Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "GrCaps.h" | 8 #include "GrCaps.h" |
| 9 #include "GrContextFactory.h" | 9 #include "GrContextFactory.h" |
| 10 #include "Benchmark.h" | 10 #include "Benchmark.h" |
| 11 #include "ResultsWriter.h" | 11 #include "ResultsWriter.h" |
| 12 #include "SkCommandLineFlags.h" | 12 #include "SkCommandLineFlags.h" |
| 13 #include "SkOSFile.h" | 13 #include "SkOSFile.h" |
| 14 #include "SkStream.h" | 14 #include "SkStream.h" |
| 15 #include "SkSurface.h" | 15 #include "SkSurface.h" |
| 16 #include "SkTime.h" | 16 #include "SkTime.h" |
| 17 #include "SkTLList.h" | |
| 18 #include "SkThreadUtils.h" | |
| 17 #include "Stats.h" | 19 #include "Stats.h" |
| 18 #include "Timer.h" | 20 #include "Timer.h" |
| 19 #include "VisualSKPBench.h" | 21 #include "VisualSKPBench.h" |
| 20 #include "gl/GrGLDefines.h" | 22 #include "gl/GrGLDefines.h" |
| 23 #include "../private/SkMutex.h" | |
| 24 #include "../private/SkSemaphore.h" | |
| 25 #include "../private/SkGpuFenceSync.h" | |
| 21 | 26 |
| 22 // posix only for now | 27 // posix only for now |
| 23 #include <unistd.h> | 28 #include <unistd.h> |
| 24 #include <sys/types.h> | 29 #include <sys/types.h> |
| 25 #include <sys/wait.h> | 30 #include <sys/wait.h> |
| 26 | 31 |
| 27 /* | 32 /* |
| 28 * This is an experimental GPU only benchmarking program. The initial implement ation will only | 33 * This is an experimental GPU only benchmarking program. The initial implement ation will only |
| 29 * support SKPs. | 34 * support SKPs. |
| 30 */ | 35 */ |
| 31 | 36 |
| 32 // To get image decoders linked in we have to do the below magic | 37 // To get image decoders linked in we have to do the below magic |
| 33 #include "SkForceLinking.h" | 38 #include "SkForceLinking.h" |
| 34 #include "SkImageDecoder.h" | 39 #include "SkImageDecoder.h" |
| 35 __SK_FORCE_IMAGE_DECODER_LINKING; | 40 __SK_FORCE_IMAGE_DECODER_LINKING; |
| 36 | 41 |
| 37 | |
| 38 static const int kAutoTuneLoops = 0; | 42 static const int kAutoTuneLoops = 0; |
| 39 | 43 |
| 40 static const int kDefaultLoops = | 44 static const int kDefaultLoops = |
| 41 #ifdef SK_DEBUG | 45 #ifdef SK_DEBUG |
| 42 1; | 46 1; |
| 43 #else | 47 #else |
| 44 kAutoTuneLoops; | 48 kAutoTuneLoops; |
| 45 #endif | 49 #endif |
| 46 | 50 |
| 47 static SkString loops_help_txt() { | 51 static SkString loops_help_txt() { |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 61 "$ requires the end of the bench to match\n" | 65 "$ requires the end of the bench to match\n" |
| 62 "^ and $ requires an exact match\n" | 66 "^ and $ requires an exact match\n" |
| 63 "If a bench does not match any list entry,\n" | 67 "If a bench does not match any list entry,\n" |
| 64 "it is skipped unless some list entry starts with ~"); | 68 "it is skipped unless some list entry starts with ~"); |
| 65 DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU allows to lag."); | 69 DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU allows to lag."); |
| 66 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); | 70 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); |
| 67 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); | 71 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); |
| 68 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); | 72 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); |
| 69 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); | 73 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); |
| 70 DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs."); | 74 DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs."); |
| 75 DEFINE_bool(useBackgroundThread, true, "If false, kilobench will time cpu / gpu work together"); | |
| 76 DEFINE_bool(useMultiProcess, true, "If false, kilobench will run all tests in on e process"); | |
| 71 | 77 |
| 72 static SkString humanize(double ms) { | 78 static SkString humanize(double ms) { |
| 73 return HumanizeMs(ms); | 79 return HumanizeMs(ms); |
| 74 } | 80 } |
| 75 #define HUMANIZE(ms) humanize(ms).c_str() | 81 #define HUMANIZE(ms) humanize(ms).c_str() |
| 76 | 82 |
| 77 namespace kilobench { | 83 namespace kilobench { |
| 78 class BenchmarkStream { | 84 class BenchmarkStream { |
| 79 public: | 85 public: |
| 80 BenchmarkStream() : fCurrentSKP(0) { | 86 BenchmarkStream() : fCurrentSKP(0) { |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 139 | 145 |
| 140 return nullptr; | 146 return nullptr; |
| 141 } | 147 } |
| 142 | 148 |
| 143 SkTArray<SkString> fSKPs; | 149 SkTArray<SkString> fSKPs; |
| 144 int fCurrentSKP; | 150 int fCurrentSKP; |
| 145 }; | 151 }; |
| 146 | 152 |
| 147 struct GPUTarget { | 153 struct GPUTarget { |
| 148 void setup() { | 154 void setup() { |
| 149 this->gl->makeCurrent(); | 155 this->fGL->makeCurrent(); |
|
bsalomon
2016/01/20 18:54:52
We don't really need all these thises (these?, thi
joshualitt
2016/01/20 21:14:52
Acknowledged.
| |
| 150 // Make sure we're done with whatever came before. | 156 // Make sure we're done with whatever came before. |
| 151 SK_GL(*this->gl, Finish()); | 157 SK_GL(*this->fGL, Finish()); |
| 152 } | 158 } |
| 153 | 159 |
| 154 SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; } | 160 SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; } |
| 155 | 161 |
| 156 void endTiming() { | 162 void endTiming(bool usePlatformSwapBuffers) { |
| 157 if (this->gl) { | 163 if (this->fGL) { |
| 158 SK_GL(*this->gl, Flush()); | 164 SK_GL(*this->fGL, Flush()); |
| 159 this->gl->swapBuffers(); | 165 this->fGL->swapBuffers(usePlatformSwapBuffers); |
| 160 } | 166 } |
| 161 } | 167 } |
| 162 void fence() { | 168 void fence() { |
| 163 SK_GL(*this->gl, Finish()); | 169 SK_GL(*this->fGL, Finish()); |
| 164 } | 170 } |
| 165 | 171 |
| 166 bool needsFrameTiming(int* maxFrameLag) const { | 172 bool needsFrameTiming(int* maxFrameLag) const { |
| 167 if (!this->gl->getMaxGpuFrameLag(maxFrameLag)) { | 173 if (!this->fGL->getMaxGpuFrameLag(maxFrameLag)) { |
| 168 // Frame lag is unknown. | 174 // Frame lag is unknown. |
| 169 *maxFrameLag = FLAGS_gpuFrameLag; | 175 *maxFrameLag = FLAGS_gpuFrameLag; |
| 170 } | 176 } |
| 171 return true; | 177 return true; |
| 172 } | 178 } |
| 173 | 179 |
| 174 bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText, | 180 bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText, |
| 175 GrContextFactory::GLContextType ctxType, | 181 GrContextFactory::GLContextType ctxType, |
| 176 GrContextFactory::GLContextOptions ctxOptions, int numSamples) { | 182 GrContextFactory::GLContextOptions ctxOptions, int numSamples) { |
| 177 GrContext* context = factory->get(ctxType, ctxOptions); | 183 GrContext* context = factory->get(ctxType, ctxOptions); |
| 178 int maxRTSize = context->caps()->maxRenderTargetSize(); | 184 int maxRTSize = context->caps()->maxRenderTargetSize(); |
| 179 SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSi ze), | 185 SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSi ze), |
| 180 SkTMin(bench->getSize().fY, maxRTSi ze), | 186 SkTMin(bench->getSize().fY, maxRTSi ze), |
| 181 kN32_SkColorType, kPremul_SkAlphaT ype); | 187 kN32_SkColorType, kPremul_SkAlphaT ype); |
| 182 uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_ Flag : | 188 uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_ Flag : |
| 183 0; | 189 0; |
| 184 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); | 190 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); |
| 185 this->surface.reset(SkSurface::NewRenderTarget(context, | 191 this->surface.reset(SkSurface::NewRenderTarget(context, |
| 186 SkSurface::kNo_Budgeted, info, | 192 SkSurface::kNo_Budgeted, info, |
| 187 numSamples, &props)); | 193 numSamples, &props)); |
| 188 this->gl = factory->getContextInfo(ctxType, ctxOptions).fGLContext; | 194 this->fGL = factory->getContextInfo(ctxType, ctxOptions).fGLContext; |
| 189 if (!this->surface.get()) { | 195 if (!this->surface.get()) { |
| 190 return false; | 196 return false; |
| 191 } | 197 } |
| 192 | 198 |
| 193 // Kilobench should only be used on platforms with fence sync support | 199 // Kilobench should only be used on platforms with fence sync support |
| 194 SkASSERT(this->gl->fenceSyncSupport()); | 200 SkASSERT(this->fGL->fenceSyncSupport()); |
| 195 return true; | 201 return true; |
| 196 } | 202 } |
| 197 | 203 |
| 198 SkCanvas* getCanvas() const { | 204 SkCanvas* getCanvas() const { |
| 199 if (!surface.get()) { | 205 if (!surface.get()) { |
| 200 return nullptr; | 206 return nullptr; |
| 201 } | 207 } |
| 202 return surface->getCanvas(); | 208 return surface->getCanvas(); |
| 203 } | 209 } |
| 204 | 210 |
| 205 bool capturePixels(SkBitmap* bmp) { | 211 bool capturePixels(SkBitmap* bmp) { |
| 206 SkCanvas* canvas = this->getCanvas(); | 212 SkCanvas* canvas = this->getCanvas(); |
| 207 if (!canvas) { | 213 if (!canvas) { |
| 208 return false; | 214 return false; |
| 209 } | 215 } |
| 210 bmp->setInfo(canvas->imageInfo()); | 216 bmp->setInfo(canvas->imageInfo()); |
| 211 if (!canvas->readPixels(bmp, 0, 0)) { | 217 if (!canvas->readPixels(bmp, 0, 0)) { |
| 212 SkDebugf("Can't read canvas pixels.\n"); | 218 SkDebugf("Can't read canvas pixels.\n"); |
| 213 return false; | 219 return false; |
| 214 } | 220 } |
| 215 return true; | 221 return true; |
| 216 } | 222 } |
| 217 | 223 |
| 224 SkGLContext* gl() { return fGL; } | |
| 225 | |
| 218 private: | 226 private: |
| 219 //const Config config; | 227 SkGLContext* fGL; |
| 220 SkGLContext* gl; | |
| 221 SkAutoTDelete<SkSurface> surface; | 228 SkAutoTDelete<SkSurface> surface; |
| 222 }; | 229 }; |
| 223 | 230 |
| 224 static bool write_canvas_png(GPUTarget* target, const SkString& filename) { | 231 static bool write_canvas_png(GPUTarget* target, const SkString& filename) { |
| 225 | 232 |
| 226 if (filename.isEmpty()) { | 233 if (filename.isEmpty()) { |
| 227 return false; | 234 return false; |
| 228 } | 235 } |
| 229 if (target->getCanvas() && | 236 if (target->getCanvas() && |
| 230 kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) { | 237 kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) { |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 269 return 1; | 276 return 1; |
| 270 } | 277 } |
| 271 if (loops > FLAGS_maxLoops) { | 278 if (loops > FLAGS_maxLoops) { |
| 272 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo ps, FLAGS_maxLoops); | 279 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo ps, FLAGS_maxLoops); |
| 273 return FLAGS_maxLoops; | 280 return FLAGS_maxLoops; |
| 274 } | 281 } |
| 275 return loops; | 282 return loops; |
| 276 } | 283 } |
| 277 | 284 |
| 278 static double now_ms() { return SkTime::GetNSecs() * 1e-6; } | 285 static double now_ms() { return SkTime::GetNSecs() * 1e-6; } |
| 279 static double time(int loops, Benchmark* bench, GPUTarget* target) { | 286 |
| 287 struct TimingThread { | |
| 288 TimingThread(SkGLContext* mainContext) | |
| 289 : fFenceSync(mainContext->fenceSync()) | |
| 290 , fMainContext(mainContext) | |
| 291 , fDone(false) {} | |
| 292 | |
| 293 static void Loop(void* data) { | |
| 294 TimingThread* timingThread = reinterpret_cast<TimingThread*>(data); | |
| 295 timingThread->timingLoop(); | |
| 296 } | |
| 297 | |
| 298 void timingLoop() { | |
| 299 // Create a context which shares display lists with the main thread | |
| 300 SkAutoTDelete<SkGLContext> glContext(SkCreatePlatformGLContext(kNone_GrG LStandard, | |
| 301 fMainCont ext)); | |
| 302 glContext->makeCurrent(); | |
| 303 | |
| 304 // Basic timing methodology is: | |
| 305 // 1) Wait on semaphore until main thread indicates its time to start ti ming the frame | |
| 306 // 2) Wait on frame start sync, record time. This is start of the frame . | |
| 307 // 3) Wait on semaphore until main thread indicates its time to finish t iming the frame | |
| 308 // 4) Wait on frame end sync, record time. FrameEndTime - FrameStartTim e = frame time | |
| 309 // 5) Wait on semaphore until main thread indicates we should time the n ext frame or quit | |
| 310 while (true) { | |
| 311 fSemaphore.wait(); | |
| 312 | |
| 313 // get start sync | |
| 314 SkPlatformGpuFence startSync = this->popStartSync(); | |
| 315 | |
| 316 // wait on sync | |
| 317 fFenceSync->flushAndWaitFence(startSync); | |
|
bsalomon
2016/01/20 18:54:52
Assume in light of other CL, this will not flush?
joshualitt
2016/01/20 21:14:52
Acknowledged.
| |
| 318 double start = kilobench::now_ms(); | |
| 319 | |
| 320 // do we want to sleep here? | |
| 321 // wait for end sync | |
| 322 fSemaphore.wait(); | |
| 323 | |
| 324 // get end sync | |
| 325 SkPlatformGpuFence endSync = this->popEndSync(); | |
| 326 | |
| 327 // wait on sync | |
| 328 fFenceSync->flushAndWaitFence(endSync); | |
| 329 double elapsed = kilobench::now_ms() - start; | |
| 330 | |
| 331 // No mutex needed, client won't touch timings until we're done | |
| 332 fTimings.push_back(elapsed); | |
| 333 | |
| 334 // clean up fences | |
| 335 fFenceSync->deleteFence(startSync); | |
| 336 fFenceSync->deleteFence(endSync); | |
| 337 | |
| 338 fSemaphore.wait(); | |
| 339 if (this->isDone()) { | |
| 340 break; | |
| 341 } | |
| 342 } | |
| 343 } | |
| 344 | |
| 345 void pushStartSync() { this->pushSync(&fFrameStartSyncs, &fFrameStartSyncsMu tex); } | |
| 346 | |
| 347 SkPlatformGpuFence popStartSync() { | |
| 348 return this->popSync(&fFrameStartSyncs, &fFrameStartSyncsMutex); | |
| 349 } | |
| 350 | |
| 351 void pushEndSync() { this->pushSync(&fFrameEndSyncs, &fFrameEndSyncsMutex); } | |
| 352 | |
| 353 SkPlatformGpuFence popEndSync() { return this->popSync(&fFrameEndSyncs, &fFr ameEndSyncsMutex); } | |
| 354 | |
| 355 void setDone() { | |
| 356 SkAutoMutexAcquire done(fDoneMutex); | |
| 357 fDone = true; | |
| 358 fSemaphore.signal(); | |
| 359 } | |
| 360 | |
| 361 typedef SkTLList<SkPlatformGpuFence, 1> SyncQueue; | |
| 362 | |
| 363 void pushSync(SyncQueue* queue, SkMutex* mutex) { | |
| 364 SkAutoMutexAcquire am(mutex); | |
| 365 *queue->addToHead() = fFenceSync->insertFence(); | |
| 366 fSemaphore.signal(); | |
| 367 } | |
| 368 | |
| 369 SkPlatformGpuFence popSync(SyncQueue* queue, SkMutex* mutex) { | |
| 370 SkAutoMutexAcquire am(mutex); | |
| 371 SkPlatformGpuFence sync = *queue->head(); | |
| 372 queue->popHead(); | |
| 373 return sync; | |
| 374 } | |
| 375 | |
| 376 bool isDone() { | |
| 377 SkAutoMutexAcquire am1(fFrameStartSyncsMutex); | |
| 378 SkAutoMutexAcquire done(fDoneMutex); | |
| 379 if (fDone && fFrameStartSyncs.isEmpty()) { | |
| 380 return true; | |
| 381 } else { | |
| 382 return false; | |
| 383 } | |
| 384 } | |
| 385 | |
| 386 const SkTArray<double>& timings() const { SkASSERT(fDone); return fTimings; } | |
| 387 | |
| 388 private: | |
| 389 SkGpuFenceSync* fFenceSync; | |
| 390 SkSemaphore fSemaphore; | |
| 391 SkMutex fFrameStartSyncsMutex; | |
| 392 SyncQueue fFrameStartSyncs; | |
| 393 SkMutex fFrameEndSyncsMutex; | |
| 394 SyncQueue fFrameEndSyncs; | |
| 395 SkTArray<double> fTimings; | |
| 396 SkMutex fDoneMutex; | |
| 397 SkGLContext* fMainContext; | |
| 398 bool fDone; | |
| 399 }; | |
| 400 | |
| 401 static double time(int loops, Benchmark* bench, GPUTarget* target, TimingThread* timingThread) { | |
| 280 SkCanvas* canvas = target->getCanvas(); | 402 SkCanvas* canvas = target->getCanvas(); |
| 281 if (canvas) { | 403 canvas->clear(SK_ColorWHITE); |
| 282 canvas->clear(SK_ColorWHITE); | 404 bench->preDraw(canvas); |
| 405 | |
| 406 if (timingThread) { | |
| 407 timingThread->pushStartSync(); | |
| 283 } | 408 } |
| 284 bench->preDraw(canvas); | |
| 285 double start = now_ms(); | 409 double start = now_ms(); |
| 286 canvas = target->beginTiming(canvas); | 410 canvas = target->beginTiming(canvas); |
| 287 bench->draw(loops, canvas); | 411 bench->draw(loops, canvas); |
| 288 if (canvas) { | 412 canvas->flush(); |
| 289 canvas->flush(); | 413 target->endTiming(timingThread ? true : false); |
| 414 | |
| 415 double elapsed = now_ms() - start; | |
| 416 if (timingThread) { | |
| 417 timingThread->pushEndSync(); | |
| 418 timingThread->setDone(); | |
| 290 } | 419 } |
| 291 target->endTiming(); | |
| 292 double elapsed = now_ms() - start; | |
| 293 bench->postDraw(canvas); | 420 bench->postDraw(canvas); |
| 294 return elapsed; | 421 return elapsed; |
| 295 } | 422 } |
| 296 | 423 |
| 424 // TODO For now we don't use the background timing thread to tune loops | |
| 297 static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL ag) { | 425 static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL ag) { |
| 298 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp uMs. | 426 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp uMs. |
| 299 int loops = bench->calculateLoops(FLAGS_loops); | 427 int loops = bench->calculateLoops(FLAGS_loops); |
| 300 if (kAutoTuneLoops == loops) { | 428 if (kAutoTuneLoops == loops) { |
| 301 loops = 1; | 429 loops = 1; |
| 302 double elapsed = 0; | 430 double elapsed = 0; |
| 303 do { | 431 do { |
| 304 if (1<<30 == loops) { | 432 if (1<<30 == loops) { |
| 305 // We're about to wrap. Something's wrong with the bench. | 433 // We're about to wrap. Something's wrong with the bench. |
| 306 loops = 0; | 434 loops = 0; |
| 307 break; | 435 break; |
| 308 } | 436 } |
| 309 loops *= 2; | 437 loops *= 2; |
| 310 // If the GPU lets frames lag at all, we need to make sure we're tim ing | 438 // If the GPU lets frames lag at all, we need to make sure we're tim ing |
| 311 // _this_ round, not still timing last round. | 439 // _this_ round, not still timing last round. |
| 312 for (int i = 0; i < maxGpuFrameLag; i++) { | 440 for (int i = 0; i < maxGpuFrameLag; i++) { |
| 313 elapsed = time(loops, bench, target); | 441 elapsed = time(loops, bench, target, nullptr); |
| 314 } | 442 } |
| 315 } while (elapsed < FLAGS_gpuMs); | 443 } while (elapsed < FLAGS_gpuMs); |
| 316 | 444 |
| 317 // We've overshot at least a little. Scale back linearly. | 445 // We've overshot at least a little. Scale back linearly. |
| 318 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); | 446 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); |
| 319 loops = clamp_loops(loops); | 447 loops = clamp_loops(loops); |
| 320 | 448 |
| 321 // Make sure we're not still timing our calibration. | 449 // Make sure we're not still timing our calibration. |
| 322 target->fence(); | 450 target->fence(); |
| 323 } else { | 451 } else { |
| 324 loops = detect_forever_loops(loops); | 452 loops = detect_forever_loops(loops); |
| 325 } | 453 } |
| 326 | 454 |
| 327 // Pretty much the same deal as the calibration: do some warmup to make | 455 // Pretty much the same deal as the calibration: do some warmup to make |
| 328 // sure we're timing steady-state pipelined frames. | 456 // sure we're timing steady-state pipelined frames. |
| 329 for (int i = 0; i < maxGpuFrameLag - 1; i++) { | 457 for (int i = 0; i < maxGpuFrameLag - 1; i++) { |
| 330 time(loops, bench, target); | 458 time(loops, bench, target, nullptr); |
| 331 } | 459 } |
| 332 | 460 |
| 333 return loops; | 461 return loops; |
| 334 } | 462 } |
| 335 | 463 |
| 336 struct AutoSetupContextBenchAndTarget { | 464 struct AutoSetupContextBenchAndTarget { |
| 337 AutoSetupContextBenchAndTarget(Benchmark* bench) : fBenchmark(bench) { | 465 AutoSetupContextBenchAndTarget(Benchmark* bench) : fBenchmark(bench) { |
| 338 GrContextOptions grContextOpts; | 466 GrContextOptions grContextOpts; |
| 339 fCtxFactory.reset(new GrContextFactory(grContextOpts)); | 467 fCtxFactory.reset(new GrContextFactory(grContextOpts)); |
| 340 | 468 |
| 341 SkAssertResult(fTarget.init(bench, fCtxFactory, false, | 469 SkAssertResult(fTarget.init(bench, fCtxFactory, false, |
| 342 GrContextFactory::kNative_GLContextType, | 470 GrContextFactory::kNative_GLContextType, |
| 343 GrContextFactory::kNone_GLContextOptions, 0) ); | 471 GrContextFactory::kNone_GLContextOptions, 0) ); |
| 344 | 472 |
| 345 fCanvas = fTarget.getCanvas(); | 473 fCanvas = fTarget.getCanvas(); |
| 346 fTarget.setup(); | 474 fTarget.setup(); |
| 347 | 475 |
| 348 bench->perCanvasPreDraw(fCanvas); | 476 bench->perCanvasPreDraw(fCanvas); |
| 349 fTarget.needsFrameTiming(&fMaxFrameLag); | 477 fTarget.needsFrameTiming(&fMaxFrameLag); |
| 350 } | 478 } |
| 351 | 479 |
| 352 int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag); } | 480 int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag); } |
| 353 | 481 |
| 354 double timeSample(int loops) { | 482 double timeSample(int loops, TimingThread* timingThread) { |
| 355 for (int i = 0; i < fMaxFrameLag; i++) { | 483 for (int i = 0; i < fMaxFrameLag; i++) { |
| 356 time(loops, fBenchmark, &fTarget); | 484 time(loops, fBenchmark, &fTarget, timingThread); |
| 357 } | 485 } |
| 358 | 486 |
| 359 return time(loops, fBenchmark, &fTarget) / loops; | 487 return time(loops, fBenchmark, &fTarget, timingThread) / loops; |
| 360 } | 488 } |
| 489 | |
| 361 void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); } | 490 void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); } |
| 362 | 491 |
| 363 SkAutoTDelete<GrContextFactory> fCtxFactory; | 492 SkAutoTDelete<GrContextFactory> fCtxFactory; |
| 364 GPUTarget fTarget; | 493 GPUTarget fTarget; |
| 365 SkCanvas* fCanvas; | 494 SkCanvas* fCanvas; |
| 366 Benchmark* fBenchmark; | 495 Benchmark* fBenchmark; |
| 367 int fMaxFrameLag; | 496 int fMaxFrameLag; |
| 368 }; | 497 }; |
| 369 | 498 |
| 370 int setup_loops(Benchmark* bench) { | 499 int setup_loops(Benchmark* bench) { |
| 371 AutoSetupContextBenchAndTarget ascbt(bench); | 500 AutoSetupContextBenchAndTarget ascbt(bench); |
| 372 int loops = ascbt.getLoops(); | 501 int loops = ascbt.getLoops(); |
| 373 ascbt.teardownBench(); | 502 ascbt.teardownBench(); |
| 374 | 503 |
| 375 if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) { | 504 if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) { |
| 376 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu"); | 505 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu"); |
| 377 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName() ); | 506 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName() ); |
| 378 pngFilename.append(".png"); | 507 pngFilename.append(".png"); |
| 379 write_canvas_png(&ascbt.fTarget, pngFilename); | 508 write_canvas_png(&ascbt.fTarget, pngFilename); |
| 380 } | 509 } |
| 381 return loops; | 510 return loops; |
| 382 } | 511 } |
| 383 | 512 |
| 384 double time_sample(Benchmark* bench, int loops) { | 513 double time_sample(Benchmark* bench, int loops) { |
| 385 AutoSetupContextBenchAndTarget ascbt(bench); | 514 AutoSetupContextBenchAndTarget ascbt(bench); |
| 386 double sample = ascbt.timeSample(loops); | 515 |
| 516 double sample; | |
| 517 if (FLAGS_useBackgroundThread) { | |
| 518 TimingThread timingThread(ascbt.fTarget.gl()); | |
| 519 SkAutoTDelete<SkThread> nativeThread(new SkThread(TimingThread::Loop, &t imingThread)); | |
| 520 nativeThread->start(); | |
| 521 sample = ascbt.timeSample(loops, &timingThread); | |
| 522 nativeThread->join(); | |
| 523 | |
| 524 // TODO get these times out of here | |
| 525 for (int i = 0; i < timingThread.timings().count(); i++) { | |
| 526 SkDebugf("gpu times %s\n", HUMANIZE(timingThread.timings()[i])); | |
| 527 } | |
| 528 } else { | |
| 529 sample = ascbt.timeSample(loops, nullptr); | |
| 530 } | |
| 531 | |
| 387 ascbt.teardownBench(); | 532 ascbt.teardownBench(); |
| 388 | 533 |
| 389 return sample; | 534 return sample; |
| 390 } | 535 } |
| 391 | 536 |
| 392 } // namespace kilobench | 537 } // namespace kilobench |
| 393 | 538 |
| 394 static const int kOutResultSize = 1024; | 539 static const int kOutResultSize = 1024; |
| 395 | 540 |
| 396 int kilobench_main() { | 541 int kilobench_main() { |
| 397 kilobench::BenchmarkStream benchStream; | 542 kilobench::BenchmarkStream benchStream; |
| 398 | 543 |
| 399 SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", | 544 SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", |
| 400 FLAGS_samples, "samples"); | 545 FLAGS_samples, "samples"); |
| 401 | 546 |
| 402 int descriptors[2]; | 547 int descriptors[2]; |
| 403 if (pipe(descriptors) != 0) { | 548 if (pipe(descriptors) != 0) { |
| 404 SkFAIL("Failed to open a pipe\n"); | 549 SkFAIL("Failed to open a pipe\n"); |
| 405 } | 550 } |
| 406 | 551 |
| 407 while (Benchmark* b = benchStream.next()) { | 552 while (Benchmark* b = benchStream.next()) { |
| 408 SkAutoTDelete<Benchmark> bench(b); | 553 SkAutoTDelete<Benchmark> bench(b); |
| 409 | 554 |
| 410 int loops; | 555 int loops = 1; |
| 411 SkTArray<double> samples; | 556 SkTArray<double> samples; |
| 412 for (int i = 0; i < FLAGS_samples + 1; i++) { | 557 for (int i = 0; i < FLAGS_samples + 1; i++) { |
| 413 // We fork off a new process to setup the grcontext and run the test while we wait | 558 // We fork off a new process to setup the grcontext and run the test while we wait |
| 414 int childPid = fork(); | 559 if (FLAGS_useMultiProcess) { |
| 415 if (childPid > 0) { | 560 int childPid = fork(); |
| 416 char result[kOutResultSize]; | 561 if (childPid > 0) { |
| 417 if (read(descriptors[0], result, kOutResultSize) < 0) { | 562 char result[kOutResultSize]; |
| 418 SkFAIL("Failed to read from pipe\n"); | 563 if (read(descriptors[0], result, kOutResultSize) < 0) { |
| 564 SkFAIL("Failed to read from pipe\n"); | |
| 565 } | |
| 566 | |
| 567 // if samples == 0 then parse # of loops | |
| 568 // else parse float | |
| 569 if (i == 0) { | |
| 570 sscanf(result, "%d", &loops); | |
| 571 } else { | |
| 572 sscanf(result, "%lf", &samples.push_back()); | |
| 573 } | |
| 574 | |
| 575 // wait until exit | |
| 576 int status; | |
| 577 waitpid(childPid, &status, 0); | |
| 578 } else if (0 == childPid) { | |
| 579 char result[kOutResultSize]; | |
| 580 if (i == 0) { | |
| 581 sprintf(result, "%d", kilobench::setup_loops(bench)); | |
| 582 } else { | |
| 583 sprintf(result, "%lf", kilobench::time_sample(bench, loo ps)); | |
| 584 } | |
| 585 | |
| 586 // Make sure to write the null terminator | |
| 587 if (write(descriptors[1], result, strlen(result) + 1) < 0) { | |
| 588 SkFAIL("Failed to write to pipe\n"); | |
| 589 } | |
| 590 return 0; | |
| 591 } else { | |
| 592 SkFAIL("Fork failed\n"); | |
| 419 } | 593 } |
| 420 | 594 } else { |
| 421 // if samples == 0 then parse # of loops | |
| 422 // else parse float | |
| 423 if (i == 0) { | 595 if (i == 0) { |
| 424 sscanf(result, "%d", &loops); | 596 loops = kilobench::setup_loops(bench); |
| 425 } else { | 597 } else { |
| 426 sscanf(result, "%lf", &samples.push_back()); | 598 samples.push_back() = kilobench::time_sample(bench, loops); |
| 427 } | 599 } |
| 428 | |
| 429 // wait until exit | |
| 430 int status; | |
| 431 waitpid(childPid, &status, 0); | |
| 432 } else if (0 == childPid) { | |
| 433 char result[kOutResultSize]; | |
| 434 if (i == 0) { | |
| 435 sprintf(result, "%d", kilobench::setup_loops(bench)); | |
| 436 } else { | |
| 437 sprintf(result, "%lf", kilobench::time_sample(bench, loops)) ; | |
| 438 } | |
| 439 | |
| 440 // Make sure to write the null terminator | |
| 441 if (write(descriptors[1], result, strlen(result) + 1) < 0) { | |
| 442 SkFAIL("Failed to write to pipe\n"); | |
| 443 } | |
| 444 return 0; | |
| 445 } else { | |
| 446 SkFAIL("Fork failed\n"); | |
| 447 } | 600 } |
| 448 } | 601 } |
| 449 | 602 |
| 450 Stats stats(samples); | 603 Stats stats(samples); |
| 451 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean; | 604 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean; |
| 452 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" | 605 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" |
| 453 , loops | 606 , loops |
| 454 , HUMANIZE(stats.min) | 607 , HUMANIZE(stats.min) |
| 455 , HUMANIZE(stats.median) | 608 , HUMANIZE(stats.median) |
| 456 , HUMANIZE(stats.mean) | 609 , HUMANIZE(stats.mean) |
| 457 , HUMANIZE(stats.max) | 610 , HUMANIZE(stats.max) |
| 458 , stddev_percent | 611 , stddev_percent |
| 459 , stats.plot.c_str() | 612 , stats.plot.c_str() |
| 460 , "gpu" | 613 , "gpu" |
| 461 , bench->getUniqueName() | 614 , bench->getUniqueName() |
| 462 ); | 615 ); |
| 463 | 616 |
| 464 } | 617 } |
| 465 return 0; | 618 return 0; |
| 466 } | 619 } |
| 467 | 620 |
| 468 #if !defined SK_BUILD_FOR_IOS | 621 #if !defined SK_BUILD_FOR_IOS |
| 469 int main(int argc, char** argv) { | 622 int main(int argc, char** argv) { |
| 470 SkCommandLineFlags::Parse(argc, argv); | 623 SkCommandLineFlags::Parse(argc, argv); |
| 471 return kilobench_main(); | 624 return kilobench_main(); |
| 472 } | 625 } |
| 473 #endif | 626 #endif |
| OLD | NEW |