OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "GrCaps.h" | 8 #include "GrCaps.h" |
9 #include "GrContextFactory.h" | 9 #include "GrContextFactory.h" |
10 #include "Benchmark.h" | 10 #include "Benchmark.h" |
11 #include "ResultsWriter.h" | 11 #include "ResultsWriter.h" |
12 #include "SkCommandLineFlags.h" | 12 #include "SkCommandLineFlags.h" |
13 #include "SkOSFile.h" | 13 #include "SkOSFile.h" |
14 #include "SkStream.h" | 14 #include "SkStream.h" |
15 #include "SkSurface.h" | 15 #include "SkSurface.h" |
16 #include "SkTime.h" | 16 #include "SkTime.h" |
17 #include "SkTLList.h" | |
18 #include "SkThreadUtils.h" | |
17 #include "Stats.h" | 19 #include "Stats.h" |
18 #include "Timer.h" | 20 #include "Timer.h" |
19 #include "VisualSKPBench.h" | 21 #include "VisualSKPBench.h" |
20 #include "gl/GrGLDefines.h" | 22 #include "gl/GrGLDefines.h" |
23 #include "../private/SkMutex.h" | |
24 #include "../private/SkSemaphore.h" | |
25 #include "../private/SkGpuFenceSync.h" | |
21 | 26 |
22 // posix only for now | 27 // posix only for now |
23 #include <unistd.h> | 28 #include <unistd.h> |
24 #include <sys/types.h> | 29 #include <sys/types.h> |
25 #include <sys/wait.h> | 30 #include <sys/wait.h> |
26 | 31 |
27 /* | 32 /* |
28 * This is an experimental GPU only benchmarking program. The initial implement ation will only | 33 * This is an experimental GPU only benchmarking program. The initial implement ation will only |
29 * support SKPs. | 34 * support SKPs. |
30 */ | 35 */ |
31 | 36 |
32 // To get image decoders linked in we have to do the below magic | 37 // To get image decoders linked in we have to do the below magic |
33 #include "SkForceLinking.h" | 38 #include "SkForceLinking.h" |
34 #include "SkImageDecoder.h" | 39 #include "SkImageDecoder.h" |
35 __SK_FORCE_IMAGE_DECODER_LINKING; | 40 __SK_FORCE_IMAGE_DECODER_LINKING; |
36 | 41 |
37 | |
38 static const int kAutoTuneLoops = 0; | 42 static const int kAutoTuneLoops = 0; |
39 | 43 |
40 static const int kDefaultLoops = | 44 static const int kDefaultLoops = |
41 #ifdef SK_DEBUG | 45 #ifdef SK_DEBUG |
42 1; | 46 1; |
43 #else | 47 #else |
44 kAutoTuneLoops; | 48 kAutoTuneLoops; |
45 #endif | 49 #endif |
46 | 50 |
47 static SkString loops_help_txt() { | 51 static SkString loops_help_txt() { |
(...skipping 13 matching lines...) Expand all Loading... | |
61 "$ requires the end of the bench to match\n" | 65 "$ requires the end of the bench to match\n" |
62 "^ and $ requires an exact match\n" | 66 "^ and $ requires an exact match\n" |
63 "If a bench does not match any list entry,\n" | 67 "If a bench does not match any list entry,\n" |
64 "it is skipped unless some list entry starts with ~"); | 68 "it is skipped unless some list entry starts with ~"); |
65 DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU allows to lag."); | 69 DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU allows to lag."); |
66 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); | 70 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); |
67 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); | 71 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); |
68 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); | 72 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); |
69 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); | 73 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); |
70 DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs."); | 74 DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs."); |
75 DEFINE_bool(useBackgroundThread, true, "If false, kilobench will time cpu / gpu work together"); | |
76 DEFINE_bool(useMultiProcess, true, "If false, kilobench will run all tests in on e process"); | |
71 | 77 |
72 static SkString humanize(double ms) { | 78 static SkString humanize(double ms) { |
73 return HumanizeMs(ms); | 79 return HumanizeMs(ms); |
74 } | 80 } |
75 #define HUMANIZE(ms) humanize(ms).c_str() | 81 #define HUMANIZE(ms) humanize(ms).c_str() |
76 | 82 |
77 namespace kilobench { | 83 namespace kilobench { |
78 class BenchmarkStream { | 84 class BenchmarkStream { |
79 public: | 85 public: |
80 BenchmarkStream() : fCurrentSKP(0) { | 86 BenchmarkStream() : fCurrentSKP(0) { |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
139 | 145 |
140 return nullptr; | 146 return nullptr; |
141 } | 147 } |
142 | 148 |
143 SkTArray<SkString> fSKPs; | 149 SkTArray<SkString> fSKPs; |
144 int fCurrentSKP; | 150 int fCurrentSKP; |
145 }; | 151 }; |
146 | 152 |
147 struct GPUTarget { | 153 struct GPUTarget { |
148 void setup() { | 154 void setup() { |
149 this->gl->makeCurrent(); | 155 fGL->makeCurrent(); |
150 // Make sure we're done with whatever came before. | 156 // Make sure we're done with whatever came before. |
151 SK_GL(*this->gl, Finish()); | 157 SK_GL(*fGL, Finish()); |
152 } | 158 } |
153 | 159 |
154 SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; } | 160 SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; } |
155 | 161 |
156 void endTiming() { | 162 void endTiming(bool usePlatformSwapBuffers) { |
djsollen
2016/01/25 16:54:25
why pass in a param that you never use?
| |
157 if (this->gl) { | 163 if (fGL) { |
158 SK_GL(*this->gl, Flush()); | 164 SK_GL(*fGL, Flush()); |
159 this->gl->swapBuffers(); | 165 fGL->swapBuffers(); |
160 } | 166 } |
161 } | 167 } |
162 void fence() { | 168 void fence() { |
djsollen
2016/01/25 16:54:25
seems like a poor function name when the call does
| |
163 SK_GL(*this->gl, Finish()); | 169 SK_GL(*fGL, Finish()); |
164 } | 170 } |
165 | 171 |
166 bool needsFrameTiming(int* maxFrameLag) const { | 172 bool needsFrameTiming(int* maxFrameLag) const { |
167 if (!this->gl->getMaxGpuFrameLag(maxFrameLag)) { | 173 if (!fGL->getMaxGpuFrameLag(maxFrameLag)) { |
168 // Frame lag is unknown. | 174 // Frame lag is unknown. |
169 *maxFrameLag = FLAGS_gpuFrameLag; | 175 *maxFrameLag = FLAGS_gpuFrameLag; |
170 } | 176 } |
171 return true; | 177 return true; |
172 } | 178 } |
173 | 179 |
174 bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText, | 180 bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText, |
175 GrContextFactory::GLContextType ctxType, | 181 GrContextFactory::GLContextType ctxType, |
176 GrContextFactory::GLContextOptions ctxOptions, int numSamples) { | 182 GrContextFactory::GLContextOptions ctxOptions, int numSamples) { |
177 GrContext* context = factory->get(ctxType, ctxOptions); | 183 GrContext* context = factory->get(ctxType, ctxOptions); |
178 int maxRTSize = context->caps()->maxRenderTargetSize(); | 184 int maxRTSize = context->caps()->maxRenderTargetSize(); |
179 SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSi ze), | 185 SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSi ze), |
180 SkTMin(bench->getSize().fY, maxRTSi ze), | 186 SkTMin(bench->getSize().fY, maxRTSi ze), |
181 kN32_SkColorType, kPremul_SkAlphaT ype); | 187 kN32_SkColorType, kPremul_SkAlphaT ype); |
182 uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_ Flag : | 188 uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_ Flag : |
183 0; | 189 0; |
184 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); | 190 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); |
185 this->surface.reset(SkSurface::NewRenderTarget(context, | 191 fSurface.reset(SkSurface::NewRenderTarget(context, |
186 SkSurface::kNo_Budgeted, info, | 192 SkSurface::kNo_Budgeted, info, |
187 numSamples, &props)); | 193 numSamples, &props)); |
188 this->gl = factory->getContextInfo(ctxType, ctxOptions).fGLContext; | 194 fGL = factory->getContextInfo(ctxType, ctxOptions).fGLContext; |
189 if (!this->surface.get()) { | 195 if (!fSurface.get()) { |
190 return false; | 196 return false; |
191 } | 197 } |
192 | 198 |
193 // Kilobench should only be used on platforms with fence sync support | 199 // Kilobench should only be used on platforms with fence sync support |
194 SkASSERT(this->gl->fenceSyncSupport()); | 200 SkASSERT(fGL->fenceSyncSupport()); |
195 return true; | 201 return true; |
196 } | 202 } |
197 | 203 |
198 SkCanvas* getCanvas() const { | 204 SkCanvas* getCanvas() const { |
199 if (!surface.get()) { | 205 if (!fSurface.get()) { |
200 return nullptr; | 206 return nullptr; |
201 } | 207 } |
202 return surface->getCanvas(); | 208 return fSurface->getCanvas(); |
203 } | 209 } |
204 | 210 |
205 bool capturePixels(SkBitmap* bmp) { | 211 bool capturePixels(SkBitmap* bmp) { |
206 SkCanvas* canvas = this->getCanvas(); | 212 SkCanvas* canvas = this->getCanvas(); |
207 if (!canvas) { | 213 if (!canvas) { |
208 return false; | 214 return false; |
209 } | 215 } |
210 bmp->setInfo(canvas->imageInfo()); | 216 bmp->setInfo(canvas->imageInfo()); |
211 if (!canvas->readPixels(bmp, 0, 0)) { | 217 if (!canvas->readPixels(bmp, 0, 0)) { |
212 SkDebugf("Can't read canvas pixels.\n"); | 218 SkDebugf("Can't read canvas pixels.\n"); |
213 return false; | 219 return false; |
214 } | 220 } |
215 return true; | 221 return true; |
216 } | 222 } |
217 | 223 |
224 SkGLContext* gl() { return fGL; } | |
225 | |
218 private: | 226 private: |
219 //const Config config; | 227 SkGLContext* fGL; |
220 SkGLContext* gl; | 228 SkAutoTDelete<SkSurface> fSurface; |
221 SkAutoTDelete<SkSurface> surface; | |
222 }; | 229 }; |
223 | 230 |
224 static bool write_canvas_png(GPUTarget* target, const SkString& filename) { | 231 static bool write_canvas_png(GPUTarget* target, const SkString& filename) { |
225 | 232 |
226 if (filename.isEmpty()) { | 233 if (filename.isEmpty()) { |
227 return false; | 234 return false; |
228 } | 235 } |
229 if (target->getCanvas() && | 236 if (target->getCanvas() && |
230 kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) { | 237 kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) { |
231 return false; | 238 return false; |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
269 return 1; | 276 return 1; |
270 } | 277 } |
271 if (loops > FLAGS_maxLoops) { | 278 if (loops > FLAGS_maxLoops) { |
272 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo ps, FLAGS_maxLoops); | 279 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo ps, FLAGS_maxLoops); |
273 return FLAGS_maxLoops; | 280 return FLAGS_maxLoops; |
274 } | 281 } |
275 return loops; | 282 return loops; |
276 } | 283 } |
277 | 284 |
278 static double now_ms() { return SkTime::GetNSecs() * 1e-6; } | 285 static double now_ms() { return SkTime::GetNSecs() * 1e-6; } |
279 static double time(int loops, Benchmark* bench, GPUTarget* target) { | 286 |
287 struct TimingThread { | |
288 TimingThread(SkGLContext* mainContext) | |
289 : fFenceSync(mainContext->fenceSync()) | |
290 , fMainContext(mainContext) | |
291 , fDone(false) {} | |
292 | |
293 static void Loop(void* data) { | |
294 TimingThread* timingThread = reinterpret_cast<TimingThread*>(data); | |
295 timingThread->timingLoop(); | |
296 } | |
297 | |
298 // To ensure waiting for the sync actually does something, we check to make sure the we exceed | |
299 // some small value | |
300 const double kMinElapsed = 1e-6; | |
301 bool sanity(double start) const { | |
302 double elapsed = now_ms() - start; | |
303 return elapsed > kMinElapsed; | |
304 } | |
305 | |
306 void waitFence(SkPlatformGpuFence sync) { | |
307 SkDEBUGCODE(double start = now_ms()); | |
308 fFenceSync->waitFence(sync, false); | |
309 SkASSERT(sanity(start)); | |
310 } | |
311 | |
312 void timingLoop() { | |
313 // Create a context which shares display lists with the main thread | |
314 SkAutoTDelete<SkGLContext> glContext(SkCreatePlatformGLContext(kNone_GrG LStandard, | |
315 fMainCont ext)); | |
316 glContext->makeCurrent(); | |
317 | |
318 // Basic timing methodology is: | |
319 // 1) Wait on semaphore until main thread indicates its time to start ti ming the frame | |
320 // 2) Wait on frame start sync, record time. This is start of the frame . | |
321 // 3) Wait on semaphore until main thread indicates its time to finish t iming the frame | |
322 // 4) Wait on frame end sync, record time. FrameEndTime - FrameStartTim e = frame time | |
323 // 5) Wait on semaphore until main thread indicates we should time the n ext frame or quit | |
324 while (true) { | |
325 fSemaphore.wait(); | |
326 | |
327 // get start sync | |
328 SkPlatformGpuFence startSync = this->popStartSync(); | |
329 | |
330 // wait on sync | |
331 this->waitFence(startSync); | |
332 double start = kilobench::now_ms(); | |
333 | |
334 // do we want to sleep here? | |
335 // wait for end sync | |
336 fSemaphore.wait(); | |
337 | |
338 // get end sync | |
339 SkPlatformGpuFence endSync = this->popEndSync(); | |
340 | |
341 // wait on sync | |
342 this->waitFence(endSync); | |
343 double elapsed = kilobench::now_ms() - start; | |
344 | |
345 // No mutex needed, client won't touch timings until we're done | |
346 fTimings.push_back(elapsed); | |
347 | |
348 // clean up fences | |
349 fFenceSync->deleteFence(startSync); | |
350 fFenceSync->deleteFence(endSync); | |
351 | |
352 fSemaphore.wait(); | |
353 if (this->isDone()) { | |
354 break; | |
355 } | |
356 } | |
357 } | |
358 | |
359 void pushStartSync() { this->pushSync(&fFrameStartSyncs, &fFrameStartSyncsMu tex); } | |
360 | |
361 SkPlatformGpuFence popStartSync() { | |
362 return this->popSync(&fFrameStartSyncs, &fFrameStartSyncsMutex); | |
363 } | |
364 | |
365 void pushEndSync() { this->pushSync(&fFrameEndSyncs, &fFrameEndSyncsMutex); } | |
366 | |
367 SkPlatformGpuFence popEndSync() { return this->popSync(&fFrameEndSyncs, &fFr ameEndSyncsMutex); } | |
368 | |
369 void setDone() { | |
370 SkAutoMutexAcquire done(fDoneMutex); | |
371 fDone = true; | |
372 fSemaphore.signal(); | |
373 } | |
374 | |
375 typedef SkTLList<SkPlatformGpuFence, 1> SyncQueue; | |
376 | |
377 void pushSync(SyncQueue* queue, SkMutex* mutex) { | |
378 SkAutoMutexAcquire am(mutex); | |
379 *queue->addToHead() = fFenceSync->insertFence(); | |
380 fSemaphore.signal(); | |
381 } | |
382 | |
383 SkPlatformGpuFence popSync(SyncQueue* queue, SkMutex* mutex) { | |
384 SkAutoMutexAcquire am(mutex); | |
385 SkPlatformGpuFence sync = *queue->head(); | |
386 queue->popHead(); | |
387 return sync; | |
388 } | |
389 | |
390 bool isDone() { | |
391 SkAutoMutexAcquire am1(fFrameStartSyncsMutex); | |
392 SkAutoMutexAcquire done(fDoneMutex); | |
393 if (fDone && fFrameStartSyncs.isEmpty()) { | |
394 return true; | |
395 } else { | |
396 return false; | |
397 } | |
398 } | |
399 | |
400 const SkTArray<double>& timings() const { SkASSERT(fDone); return fTimings; } | |
401 | |
402 private: | |
403 SkGpuFenceSync* fFenceSync; | |
404 SkSemaphore fSemaphore; | |
405 SkMutex fFrameStartSyncsMutex; | |
406 SyncQueue fFrameStartSyncs; | |
407 SkMutex fFrameEndSyncsMutex; | |
408 SyncQueue fFrameEndSyncs; | |
409 SkTArray<double> fTimings; | |
410 SkMutex fDoneMutex; | |
411 SkGLContext* fMainContext; | |
412 bool fDone; | |
413 }; | |
414 | |
415 static double time(int loops, Benchmark* bench, GPUTarget* target, TimingThread* timingThread) { | |
280 SkCanvas* canvas = target->getCanvas(); | 416 SkCanvas* canvas = target->getCanvas(); |
281 if (canvas) { | 417 canvas->clear(SK_ColorWHITE); |
282 canvas->clear(SK_ColorWHITE); | 418 bench->preDraw(canvas); |
419 | |
420 if (timingThread) { | |
421 timingThread->pushStartSync(); | |
283 } | 422 } |
284 bench->preDraw(canvas); | |
285 double start = now_ms(); | 423 double start = now_ms(); |
286 canvas = target->beginTiming(canvas); | 424 canvas = target->beginTiming(canvas); |
287 bench->draw(loops, canvas); | 425 bench->draw(loops, canvas); |
288 if (canvas) { | 426 canvas->flush(); |
289 canvas->flush(); | 427 target->endTiming(timingThread ? true : false); |
428 | |
429 double elapsed = now_ms() - start; | |
430 if (timingThread) { | |
431 timingThread->pushEndSync(); | |
432 timingThread->setDone(); | |
290 } | 433 } |
291 target->endTiming(); | |
292 double elapsed = now_ms() - start; | |
293 bench->postDraw(canvas); | 434 bench->postDraw(canvas); |
294 return elapsed; | 435 return elapsed; |
295 } | 436 } |
296 | 437 |
438 // TODO For now we don't use the background timing thread to tune loops | |
297 static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL ag) { | 439 static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL ag) { |
298 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp uMs. | 440 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp uMs. |
299 int loops = bench->calculateLoops(FLAGS_loops); | 441 int loops = bench->calculateLoops(FLAGS_loops); |
300 if (kAutoTuneLoops == loops) { | 442 if (kAutoTuneLoops == loops) { |
301 loops = 1; | 443 loops = 1; |
302 double elapsed = 0; | 444 double elapsed = 0; |
303 do { | 445 do { |
304 if (1<<30 == loops) { | 446 if (1<<30 == loops) { |
305 // We're about to wrap. Something's wrong with the bench. | 447 // We're about to wrap. Something's wrong with the bench. |
306 loops = 0; | 448 loops = 0; |
307 break; | 449 break; |
308 } | 450 } |
309 loops *= 2; | 451 loops *= 2; |
310 // If the GPU lets frames lag at all, we need to make sure we're tim ing | 452 // If the GPU lets frames lag at all, we need to make sure we're tim ing |
311 // _this_ round, not still timing last round. | 453 // _this_ round, not still timing last round. |
312 for (int i = 0; i < maxGpuFrameLag; i++) { | 454 for (int i = 0; i < maxGpuFrameLag; i++) { |
313 elapsed = time(loops, bench, target); | 455 elapsed = time(loops, bench, target, nullptr); |
314 } | 456 } |
315 } while (elapsed < FLAGS_gpuMs); | 457 } while (elapsed < FLAGS_gpuMs); |
316 | 458 |
317 // We've overshot at least a little. Scale back linearly. | 459 // We've overshot at least a little. Scale back linearly. |
318 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); | 460 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); |
319 loops = clamp_loops(loops); | 461 loops = clamp_loops(loops); |
320 | 462 |
321 // Make sure we're not still timing our calibration. | 463 // Make sure we're not still timing our calibration. |
322 target->fence(); | 464 target->fence(); |
323 } else { | 465 } else { |
324 loops = detect_forever_loops(loops); | 466 loops = detect_forever_loops(loops); |
325 } | 467 } |
326 | 468 |
327 // Pretty much the same deal as the calibration: do some warmup to make | 469 // Pretty much the same deal as the calibration: do some warmup to make |
328 // sure we're timing steady-state pipelined frames. | 470 // sure we're timing steady-state pipelined frames. |
329 for (int i = 0; i < maxGpuFrameLag - 1; i++) { | 471 for (int i = 0; i < maxGpuFrameLag - 1; i++) { |
330 time(loops, bench, target); | 472 time(loops, bench, target, nullptr); |
331 } | 473 } |
332 | 474 |
333 return loops; | 475 return loops; |
334 } | 476 } |
335 | 477 |
336 struct AutoSetupContextBenchAndTarget { | 478 struct AutoSetupContextBenchAndTarget { |
337 AutoSetupContextBenchAndTarget(Benchmark* bench) : fBenchmark(bench) { | 479 AutoSetupContextBenchAndTarget(Benchmark* bench) : fBenchmark(bench) { |
338 GrContextOptions grContextOpts; | 480 GrContextOptions grContextOpts; |
339 fCtxFactory.reset(new GrContextFactory(grContextOpts)); | 481 fCtxFactory.reset(new GrContextFactory(grContextOpts)); |
340 | 482 |
341 SkAssertResult(fTarget.init(bench, fCtxFactory, false, | 483 SkAssertResult(fTarget.init(bench, fCtxFactory, false, |
342 GrContextFactory::kNative_GLContextType, | 484 GrContextFactory::kNative_GLContextType, |
343 GrContextFactory::kNone_GLContextOptions, 0) ); | 485 GrContextFactory::kNone_GLContextOptions, 0) ); |
344 | 486 |
345 fCanvas = fTarget.getCanvas(); | 487 fCanvas = fTarget.getCanvas(); |
346 fTarget.setup(); | 488 fTarget.setup(); |
347 | 489 |
348 bench->perCanvasPreDraw(fCanvas); | 490 bench->perCanvasPreDraw(fCanvas); |
349 fTarget.needsFrameTiming(&fMaxFrameLag); | 491 fTarget.needsFrameTiming(&fMaxFrameLag); |
350 } | 492 } |
351 | 493 |
352 int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag); } | 494 int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag); } |
353 | 495 |
354 double timeSample(int loops) { | 496 double timeSample(int loops, TimingThread* timingThread) { |
355 for (int i = 0; i < fMaxFrameLag; i++) { | 497 for (int i = 0; i < fMaxFrameLag; i++) { |
356 time(loops, fBenchmark, &fTarget); | 498 time(loops, fBenchmark, &fTarget, timingThread); |
357 } | 499 } |
358 | 500 |
359 return time(loops, fBenchmark, &fTarget) / loops; | 501 return time(loops, fBenchmark, &fTarget, timingThread) / loops; |
360 } | 502 } |
503 | |
361 void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); } | 504 void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); } |
362 | 505 |
363 SkAutoTDelete<GrContextFactory> fCtxFactory; | 506 SkAutoTDelete<GrContextFactory> fCtxFactory; |
364 GPUTarget fTarget; | 507 GPUTarget fTarget; |
365 SkCanvas* fCanvas; | 508 SkCanvas* fCanvas; |
366 Benchmark* fBenchmark; | 509 Benchmark* fBenchmark; |
367 int fMaxFrameLag; | 510 int fMaxFrameLag; |
368 }; | 511 }; |
369 | 512 |
370 int setup_loops(Benchmark* bench) { | 513 int setup_loops(Benchmark* bench) { |
371 AutoSetupContextBenchAndTarget ascbt(bench); | 514 AutoSetupContextBenchAndTarget ascbt(bench); |
372 int loops = ascbt.getLoops(); | 515 int loops = ascbt.getLoops(); |
373 ascbt.teardownBench(); | 516 ascbt.teardownBench(); |
374 | 517 |
375 if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) { | 518 if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) { |
376 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu"); | 519 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu"); |
377 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName() ); | 520 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName() ); |
378 pngFilename.append(".png"); | 521 pngFilename.append(".png"); |
379 write_canvas_png(&ascbt.fTarget, pngFilename); | 522 write_canvas_png(&ascbt.fTarget, pngFilename); |
380 } | 523 } |
381 return loops; | 524 return loops; |
382 } | 525 } |
383 | 526 |
384 double time_sample(Benchmark* bench, int loops) { | 527 double time_sample(Benchmark* bench, int loops) { |
385 AutoSetupContextBenchAndTarget ascbt(bench); | 528 AutoSetupContextBenchAndTarget ascbt(bench); |
386 double sample = ascbt.timeSample(loops); | 529 |
530 double sample; | |
531 if (FLAGS_useBackgroundThread) { | |
532 TimingThread timingThread(ascbt.fTarget.gl()); | |
533 SkAutoTDelete<SkThread> nativeThread(new SkThread(TimingThread::Loop, &t imingThread)); | |
534 nativeThread->start(); | |
535 sample = ascbt.timeSample(loops, &timingThread); | |
536 nativeThread->join(); | |
537 | |
538 // TODO get these times out of here | |
539 for (int i = 0; i < timingThread.timings().count(); i++) { | |
540 SkDebugf("gpu times %s\n", HUMANIZE(timingThread.timings()[i])); | |
541 } | |
542 } else { | |
543 sample = ascbt.timeSample(loops, nullptr); | |
544 } | |
545 | |
387 ascbt.teardownBench(); | 546 ascbt.teardownBench(); |
388 | 547 |
389 return sample; | 548 return sample; |
390 } | 549 } |
391 | 550 |
392 } // namespace kilobench | 551 } // namespace kilobench |
393 | 552 |
394 static const int kOutResultSize = 1024; | 553 static const int kOutResultSize = 1024; |
395 | 554 |
396 int kilobench_main() { | 555 int kilobench_main() { |
397 kilobench::BenchmarkStream benchStream; | 556 kilobench::BenchmarkStream benchStream; |
398 | 557 |
399 SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", | 558 SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", |
400 FLAGS_samples, "samples"); | 559 FLAGS_samples, "samples"); |
401 | 560 |
402 int descriptors[2]; | 561 int descriptors[2]; |
403 if (pipe(descriptors) != 0) { | 562 if (pipe(descriptors) != 0) { |
404 SkFAIL("Failed to open a pipe\n"); | 563 SkFAIL("Failed to open a pipe\n"); |
405 } | 564 } |
406 | 565 |
407 while (Benchmark* b = benchStream.next()) { | 566 while (Benchmark* b = benchStream.next()) { |
408 SkAutoTDelete<Benchmark> bench(b); | 567 SkAutoTDelete<Benchmark> bench(b); |
409 | 568 |
410 int loops; | 569 int loops = 1; |
411 SkTArray<double> samples; | 570 SkTArray<double> samples; |
412 for (int i = 0; i < FLAGS_samples + 1; i++) { | 571 for (int i = 0; i < FLAGS_samples + 1; i++) { |
413 // We fork off a new process to setup the grcontext and run the test while we wait | 572 // We fork off a new process to setup the grcontext and run the test while we wait |
414 int childPid = fork(); | 573 if (FLAGS_useMultiProcess) { |
415 if (childPid > 0) { | 574 int childPid = fork(); |
416 char result[kOutResultSize]; | 575 if (childPid > 0) { |
417 if (read(descriptors[0], result, kOutResultSize) < 0) { | 576 char result[kOutResultSize]; |
418 SkFAIL("Failed to read from pipe\n"); | 577 if (read(descriptors[0], result, kOutResultSize) < 0) { |
578 SkFAIL("Failed to read from pipe\n"); | |
579 } | |
580 | |
581 // if samples == 0 then parse # of loops | |
582 // else parse float | |
583 if (i == 0) { | |
584 sscanf(result, "%d", &loops); | |
585 } else { | |
586 sscanf(result, "%lf", &samples.push_back()); | |
587 } | |
588 | |
589 // wait until exit | |
590 int status; | |
591 waitpid(childPid, &status, 0); | |
592 } else if (0 == childPid) { | |
593 char result[kOutResultSize]; | |
594 if (i == 0) { | |
595 sprintf(result, "%d", kilobench::setup_loops(bench)); | |
596 } else { | |
597 sprintf(result, "%lf", kilobench::time_sample(bench, loo ps)); | |
598 } | |
599 | |
600 // Make sure to write the null terminator | |
601 if (write(descriptors[1], result, strlen(result) + 1) < 0) { | |
602 SkFAIL("Failed to write to pipe\n"); | |
603 } | |
604 return 0; | |
605 } else { | |
606 SkFAIL("Fork failed\n"); | |
419 } | 607 } |
420 | 608 } else { |
421 // if samples == 0 then parse # of loops | |
422 // else parse float | |
423 if (i == 0) { | 609 if (i == 0) { |
424 sscanf(result, "%d", &loops); | 610 loops = kilobench::setup_loops(bench); |
425 } else { | 611 } else { |
426 sscanf(result, "%lf", &samples.push_back()); | 612 samples.push_back() = kilobench::time_sample(bench, loops); |
427 } | 613 } |
428 | |
429 // wait until exit | |
430 int status; | |
431 waitpid(childPid, &status, 0); | |
432 } else if (0 == childPid) { | |
433 char result[kOutResultSize]; | |
434 if (i == 0) { | |
435 sprintf(result, "%d", kilobench::setup_loops(bench)); | |
436 } else { | |
437 sprintf(result, "%lf", kilobench::time_sample(bench, loops)) ; | |
438 } | |
439 | |
440 // Make sure to write the null terminator | |
441 if (write(descriptors[1], result, strlen(result) + 1) < 0) { | |
442 SkFAIL("Failed to write to pipe\n"); | |
443 } | |
444 return 0; | |
445 } else { | |
446 SkFAIL("Fork failed\n"); | |
447 } | 614 } |
448 } | 615 } |
449 | 616 |
450 Stats stats(samples); | 617 Stats stats(samples); |
451 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean; | 618 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean; |
452 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" | 619 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" |
453 , loops | 620 , loops |
454 , HUMANIZE(stats.min) | 621 , HUMANIZE(stats.min) |
455 , HUMANIZE(stats.median) | 622 , HUMANIZE(stats.median) |
456 , HUMANIZE(stats.mean) | 623 , HUMANIZE(stats.mean) |
457 , HUMANIZE(stats.max) | 624 , HUMANIZE(stats.max) |
458 , stddev_percent | 625 , stddev_percent |
459 , stats.plot.c_str() | 626 , stats.plot.c_str() |
460 , "gpu" | 627 , "gpu" |
461 , bench->getUniqueName() | 628 , bench->getUniqueName() |
462 ); | 629 ); |
463 | 630 |
464 } | 631 } |
465 return 0; | 632 return 0; |
466 } | 633 } |
467 | 634 |
468 #if !defined SK_BUILD_FOR_IOS | 635 #if !defined SK_BUILD_FOR_IOS |
469 int main(int argc, char** argv) { | 636 int main(int argc, char** argv) { |
470 SkCommandLineFlags::Parse(argc, argv); | 637 SkCommandLineFlags::Parse(argc, argv); |
471 return kilobench_main(); | 638 return kilobench_main(); |
472 } | 639 } |
473 #endif | 640 #endif |
OLD | NEW |