| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright 2016 Google Inc. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license that can be | |
| 5 * found in the LICENSE file. | |
| 6 */ | |
| 7 | |
| 8 #include "GrCaps.h" | |
| 9 #include "GrContextFactory.h" | |
| 10 #include "Benchmark.h" | |
| 11 #include "ResultsWriter.h" | |
| 12 #include "SkCommandLineFlags.h" | |
| 13 #include "SkOSFile.h" | |
| 14 #include "SkStream.h" | |
| 15 #include "SkSurface.h" | |
| 16 #include "SkTime.h" | |
| 17 #include "SkTLList.h" | |
| 18 #include "SkThreadUtils.h" | |
| 19 #include "Stats.h" | |
| 20 #include "Timer.h" | |
| 21 #include "VisualSKPBench.h" | |
| 22 #include "gl/GrGLDefines.h" | |
| 23 #include "gl/GrGLUtil.h" | |
| 24 #include "../private/SkMutex.h" | |
| 25 #include "../private/SkSemaphore.h" | |
| 26 #include "../private/SkGpuFenceSync.h" | |
| 27 | |
| 28 // posix only for now | |
| 29 #include <unistd.h> | |
| 30 #include <sys/types.h> | |
| 31 #include <sys/wait.h> | |
| 32 | |
| 33 using namespace sk_gpu_test; | |
| 34 | |
| 35 /* | |
| 36 * This is an experimental GPU only benchmarking program. The initial implement
ation will only | |
| 37 * support SKPs. | |
| 38 */ | |
| 39 | |
| 40 static const int kAutoTuneLoops = 0; | |
| 41 | |
| 42 static const int kDefaultLoops = | |
| 43 #ifdef SK_DEBUG | |
| 44 1; | |
| 45 #else | |
| 46 kAutoTuneLoops; | |
| 47 #endif | |
| 48 | |
| 49 static SkString loops_help_txt() { | |
| 50 SkString help; | |
| 51 help.printf("Number of times to run each bench. Set this to %d to auto-" | |
| 52 "tune for each bench. Timings are only reported when auto-tuning
.", | |
| 53 kAutoTuneLoops); | |
| 54 return help; | |
| 55 } | |
| 56 | |
| 57 DEFINE_string(skps, "skps", "Directory to read skps from."); | |
| 58 DEFINE_string2(match, m, nullptr, | |
| 59 "[~][^]substring[$] [...] of GM name to run.\n" | |
| 60 "Multiple matches may be separated by spaces.\n" | |
| 61 "~ causes a matching bench to always be skipped\n" | |
| 62 "^ requires the start of the bench to match\n" | |
| 63 "$ requires the end of the bench to match\n" | |
| 64 "^ and $ requires an exact match\n" | |
| 65 "If a bench does not match any list entry,\n" | |
| 66 "it is skipped unless some list entry starts with ~"); | |
| 67 DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU
allows to lag."); | |
| 68 DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); | |
| 69 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); | |
| 70 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); | |
| 71 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); | |
| 72 DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs."); | |
| 73 DEFINE_bool(useBackgroundThread, true, "If false, kilobench will time cpu / gpu
work together"); | |
| 74 DEFINE_bool(useMultiProcess, true, "If false, kilobench will run all tests in on
e process"); | |
| 75 | |
| 76 static SkString humanize(double ms) { | |
| 77 return HumanizeMs(ms); | |
| 78 } | |
| 79 #define HUMANIZE(ms) humanize(ms).c_str() | |
| 80 | |
| 81 namespace kilobench { | |
| 82 class BenchmarkStream { | |
| 83 public: | |
| 84 BenchmarkStream() : fCurrentSKP(0) { | |
| 85 for (int i = 0; i < FLAGS_skps.count(); i++) { | |
| 86 if (SkStrEndsWith(FLAGS_skps[i], ".skp")) { | |
| 87 fSKPs.push_back() = FLAGS_skps[i]; | |
| 88 } else { | |
| 89 SkOSFile::Iter it(FLAGS_skps[i], ".skp"); | |
| 90 SkString path; | |
| 91 while (it.next(&path)) { | |
| 92 fSKPs.push_back() = SkOSPath::Join(FLAGS_skps[0], path.c_str
()); | |
| 93 } | |
| 94 } | |
| 95 } | |
| 96 } | |
| 97 | |
| 98 Benchmark* next() { | |
| 99 Benchmark* bench = nullptr; | |
| 100 // skips non matching benches | |
| 101 while ((bench = this->innerNext()) && | |
| 102 (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getUniqueName
()) || | |
| 103 !bench->isSuitableFor(Benchmark::kGPU_Backend))) { | |
| 104 delete bench; | |
| 105 } | |
| 106 return bench; | |
| 107 } | |
| 108 | |
| 109 private: | |
| 110 static sk_sp<SkPicture> ReadPicture(const char path[]) { | |
| 111 // Not strictly necessary, as it will be checked again later, | |
| 112 // but helps to avoid a lot of pointless work if we're going to skip it. | |
| 113 if (SkCommandLineFlags::ShouldSkip(FLAGS_match, path)) { | |
| 114 return nullptr; | |
| 115 } | |
| 116 | |
| 117 SkAutoTDelete<SkStream> stream(SkStream::NewFromFile(path)); | |
| 118 if (stream.get() == nullptr) { | |
| 119 SkDebugf("Could not read %s.\n", path); | |
| 120 return nullptr; | |
| 121 } | |
| 122 | |
| 123 return SkPicture::MakeFromStream(stream.get()); | |
| 124 } | |
| 125 | |
| 126 Benchmark* innerNext() { | |
| 127 // Render skps | |
| 128 while (fCurrentSKP < fSKPs.count()) { | |
| 129 const SkString& path = fSKPs[fCurrentSKP++]; | |
| 130 auto pic = ReadPicture(path.c_str()); | |
| 131 if (!pic) { | |
| 132 continue; | |
| 133 } | |
| 134 | |
| 135 SkString name = SkOSPath::Basename(path.c_str()); | |
| 136 return new VisualSKPBench(name.c_str(), pic.get()); | |
| 137 } | |
| 138 | |
| 139 return nullptr; | |
| 140 } | |
| 141 | |
| 142 SkTArray<SkString> fSKPs; | |
| 143 int fCurrentSKP; | |
| 144 }; | |
| 145 | |
| 146 struct GPUTarget { | |
| 147 void setup() { | |
| 148 fGL->makeCurrent(); | |
| 149 // Make sure we're done with whatever came before. | |
| 150 GR_GL_CALL(fGL->gl(), Finish()); | |
| 151 } | |
| 152 | |
| 153 SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; } | |
| 154 | |
| 155 void endTiming(bool usePlatformSwapBuffers) { | |
| 156 if (fGL) { | |
| 157 GR_GL_CALL(fGL->gl(), Flush()); | |
| 158 if (usePlatformSwapBuffers) { | |
| 159 fGL->swapBuffers(); | |
| 160 } else { | |
| 161 fGL->waitOnSyncOrSwap(); | |
| 162 } | |
| 163 } | |
| 164 } | |
| 165 void finish() { | |
| 166 GR_GL_CALL(fGL->gl(), Finish()); | |
| 167 } | |
| 168 | |
| 169 bool needsFrameTiming(int* maxFrameLag) const { | |
| 170 if (!fGL->getMaxGpuFrameLag(maxFrameLag)) { | |
| 171 // Frame lag is unknown. | |
| 172 *maxFrameLag = FLAGS_gpuFrameLag; | |
| 173 } | |
| 174 return true; | |
| 175 } | |
| 176 | |
| 177 bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText, | |
| 178 GrContextFactory::ContextType ctxType, | |
| 179 GrContextFactory::ContextOptions ctxOptions, int numSamples) { | |
| 180 GrContext* context = factory->get(ctxType, ctxOptions); | |
| 181 int maxRTSize = context->caps()->maxRenderTargetSize(); | |
| 182 SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSi
ze), | |
| 183 SkTMin(bench->getSize().fY, maxRTSi
ze), | |
| 184 kN32_SkColorType, kPremul_SkAlphaT
ype); | |
| 185 uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_
Flag : | |
| 186 0; | |
| 187 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType); | |
| 188 fSurface.reset(SkSurface::MakeRenderTarget(context, | |
| 189 SkBudgeted::kNo, info, | |
| 190 numSamples, &props).release()
); | |
| 191 fGL = factory->getContextInfo(ctxType, ctxOptions).glContext(); | |
| 192 if (!fSurface.get()) { | |
| 193 return false; | |
| 194 } | |
| 195 | |
| 196 // Kilobench should only be used on platforms with fence sync support | |
| 197 SkASSERT(fGL->fenceSyncSupport()); | |
| 198 return true; | |
| 199 } | |
| 200 | |
| 201 SkCanvas* getCanvas() const { | |
| 202 if (!fSurface.get()) { | |
| 203 return nullptr; | |
| 204 } | |
| 205 return fSurface->getCanvas(); | |
| 206 } | |
| 207 | |
| 208 bool capturePixels(SkBitmap* bmp) { | |
| 209 SkCanvas* canvas = this->getCanvas(); | |
| 210 if (!canvas) { | |
| 211 return false; | |
| 212 } | |
| 213 bmp->setInfo(canvas->imageInfo()); | |
| 214 if (!canvas->readPixels(bmp, 0, 0)) { | |
| 215 SkDebugf("Can't read canvas pixels.\n"); | |
| 216 return false; | |
| 217 } | |
| 218 return true; | |
| 219 } | |
| 220 | |
| 221 GLTestContext* gl() { return fGL; } | |
| 222 | |
| 223 private: | |
| 224 GLTestContext* fGL; | |
| 225 SkAutoTDelete<SkSurface> fSurface; | |
| 226 }; | |
| 227 | |
| 228 static bool write_canvas_png(GPUTarget* target, const SkString& filename) { | |
| 229 | |
| 230 if (filename.isEmpty()) { | |
| 231 return false; | |
| 232 } | |
| 233 if (target->getCanvas() && | |
| 234 kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) { | |
| 235 return false; | |
| 236 } | |
| 237 | |
| 238 SkBitmap bmp; | |
| 239 | |
| 240 if (!target->capturePixels(&bmp)) { | |
| 241 return false; | |
| 242 } | |
| 243 | |
| 244 SkString dir = SkOSPath::Dirname(filename.c_str()); | |
| 245 if (!sk_mkdir(dir.c_str())) { | |
| 246 SkDebugf("Can't make dir %s.\n", dir.c_str()); | |
| 247 return false; | |
| 248 } | |
| 249 SkFILEWStream stream(filename.c_str()); | |
| 250 if (!stream.isValid()) { | |
| 251 SkDebugf("Can't write %s.\n", filename.c_str()); | |
| 252 return false; | |
| 253 } | |
| 254 if (!SkImageEncoder::EncodeStream(&stream, bmp, SkImageEncoder::kPNG_Type, 1
00)) { | |
| 255 SkDebugf("Can't encode a PNG.\n"); | |
| 256 return false; | |
| 257 } | |
| 258 return true; | |
| 259 } | |
| 260 | |
| 261 static int detect_forever_loops(int loops) { | |
| 262 // look for a magic run-forever value | |
| 263 if (loops < 0) { | |
| 264 loops = SK_MaxS32; | |
| 265 } | |
| 266 return loops; | |
| 267 } | |
| 268 | |
| 269 static int clamp_loops(int loops) { | |
| 270 if (loops < 1) { | |
| 271 SkDebugf("ERROR: clamping loops from %d to 1. " | |
| 272 "There's probably something wrong with the bench.\n", loops); | |
| 273 return 1; | |
| 274 } | |
| 275 if (loops > FLAGS_maxLoops) { | |
| 276 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo
ps, FLAGS_maxLoops); | |
| 277 return FLAGS_maxLoops; | |
| 278 } | |
| 279 return loops; | |
| 280 } | |
| 281 | |
| 282 static double now_ms() { return SkTime::GetNSecs() * 1e-6; } | |
| 283 | |
| 284 struct TimingThread { | |
| 285 TimingThread(GLTestContext* mainContext) | |
| 286 : fFenceSync(mainContext->fenceSync()) | |
| 287 , fMainContext(mainContext) | |
| 288 , fDone(false) {} | |
| 289 | |
| 290 static void Loop(void* data) { | |
| 291 TimingThread* timingThread = reinterpret_cast<TimingThread*>(data); | |
| 292 timingThread->timingLoop(); | |
| 293 } | |
| 294 | |
| 295 // To ensure waiting for the sync actually does something, we check to make
sure the we exceed | |
| 296 // some small value | |
| 297 const double kMinElapsed = 1e-6; | |
| 298 bool sanity(double start) const { | |
| 299 double elapsed = now_ms() - start; | |
| 300 return elapsed > kMinElapsed; | |
| 301 } | |
| 302 | |
| 303 void waitFence(SkPlatformGpuFence sync) { | |
| 304 SkDEBUGCODE(double start = now_ms()); | |
| 305 fFenceSync->waitFence(sync); | |
| 306 SkASSERT(sanity(start)); | |
| 307 } | |
| 308 | |
| 309 void timingLoop() { | |
| 310 // Create a context which shares display lists with the main thread | |
| 311 SkAutoTDelete<GLTestContext> glContext(CreatePlatformGLTestContext(kNone
_GrGLStandard, | |
| 312 fMain
Context)); | |
| 313 glContext->makeCurrent(); | |
| 314 | |
| 315 // Basic timing methodology is: | |
| 316 // 1) Wait on semaphore until main thread indicates its time to start ti
ming the frame | |
| 317 // 2) Wait on frame start sync, record time. This is start of the frame
. | |
| 318 // 3) Wait on semaphore until main thread indicates its time to finish t
iming the frame | |
| 319 // 4) Wait on frame end sync, record time. FrameEndTime - FrameStartTim
e = frame time | |
| 320 // 5) Wait on semaphore until main thread indicates we should time the n
ext frame or quit | |
| 321 while (true) { | |
| 322 fSemaphore.wait(); | |
| 323 | |
| 324 // get start sync | |
| 325 SkPlatformGpuFence startSync = this->popStartSync(); | |
| 326 | |
| 327 // wait on sync | |
| 328 this->waitFence(startSync); | |
| 329 double start = kilobench::now_ms(); | |
| 330 | |
| 331 // do we want to sleep here? | |
| 332 // wait for end sync | |
| 333 fSemaphore.wait(); | |
| 334 | |
| 335 // get end sync | |
| 336 SkPlatformGpuFence endSync = this->popEndSync(); | |
| 337 | |
| 338 // wait on sync | |
| 339 this->waitFence(endSync); | |
| 340 double elapsed = kilobench::now_ms() - start; | |
| 341 | |
| 342 // No mutex needed, client won't touch timings until we're done | |
| 343 fTimings.push_back(elapsed); | |
| 344 | |
| 345 // clean up fences | |
| 346 fFenceSync->deleteFence(startSync); | |
| 347 fFenceSync->deleteFence(endSync); | |
| 348 | |
| 349 fSemaphore.wait(); | |
| 350 if (this->isDone()) { | |
| 351 break; | |
| 352 } | |
| 353 } | |
| 354 } | |
| 355 | |
| 356 void pushStartSync() { this->pushSync(&fFrameStartSyncs, &fFrameStartSyncsMu
tex); } | |
| 357 | |
| 358 SkPlatformGpuFence popStartSync() { | |
| 359 return this->popSync(&fFrameStartSyncs, &fFrameStartSyncsMutex); | |
| 360 } | |
| 361 | |
| 362 void pushEndSync() { this->pushSync(&fFrameEndSyncs, &fFrameEndSyncsMutex);
} | |
| 363 | |
| 364 SkPlatformGpuFence popEndSync() { return this->popSync(&fFrameEndSyncs, &fFr
ameEndSyncsMutex); } | |
| 365 | |
| 366 void setDone() { | |
| 367 SkAutoMutexAcquire done(fDoneMutex); | |
| 368 fDone = true; | |
| 369 fSemaphore.signal(); | |
| 370 } | |
| 371 | |
| 372 typedef SkTLList<SkPlatformGpuFence, 1> SyncQueue; | |
| 373 | |
| 374 void pushSync(SyncQueue* queue, SkMutex* mutex) { | |
| 375 SkAutoMutexAcquire am(mutex); | |
| 376 *queue->addToHead() = fFenceSync->insertFence(); | |
| 377 fSemaphore.signal(); | |
| 378 } | |
| 379 | |
| 380 SkPlatformGpuFence popSync(SyncQueue* queue, SkMutex* mutex) { | |
| 381 SkAutoMutexAcquire am(mutex); | |
| 382 SkPlatformGpuFence sync = *queue->head(); | |
| 383 queue->popHead(); | |
| 384 return sync; | |
| 385 } | |
| 386 | |
| 387 bool isDone() { | |
| 388 SkAutoMutexAcquire am1(fFrameStartSyncsMutex); | |
| 389 SkAutoMutexAcquire done(fDoneMutex); | |
| 390 if (fDone && fFrameStartSyncs.isEmpty()) { | |
| 391 return true; | |
| 392 } else { | |
| 393 return false; | |
| 394 } | |
| 395 } | |
| 396 | |
| 397 const SkTArray<double>& timings() const { SkASSERT(fDone); return fTimings;
} | |
| 398 | |
| 399 private: | |
| 400 SkGpuFenceSync* fFenceSync; | |
| 401 SkSemaphore fSemaphore; | |
| 402 SkMutex fFrameStartSyncsMutex; | |
| 403 SyncQueue fFrameStartSyncs; | |
| 404 SkMutex fFrameEndSyncsMutex; | |
| 405 SyncQueue fFrameEndSyncs; | |
| 406 SkTArray<double> fTimings; | |
| 407 SkMutex fDoneMutex; | |
| 408 GLTestContext* fMainContext; | |
| 409 bool fDone; | |
| 410 }; | |
| 411 | |
| 412 static double time(int loops, Benchmark* bench, GPUTarget* target, TimingThread*
timingThread) { | |
| 413 SkCanvas* canvas = target->getCanvas(); | |
| 414 canvas->clear(SK_ColorWHITE); | |
| 415 bench->preDraw(canvas); | |
| 416 | |
| 417 if (timingThread) { | |
| 418 timingThread->pushStartSync(); | |
| 419 } | |
| 420 double start = now_ms(); | |
| 421 canvas = target->beginTiming(canvas); | |
| 422 bench->draw(loops, canvas); | |
| 423 canvas->flush(); | |
| 424 target->endTiming(timingThread ? true : false); | |
| 425 | |
| 426 double elapsed = now_ms() - start; | |
| 427 if (timingThread) { | |
| 428 timingThread->pushEndSync(); | |
| 429 timingThread->setDone(); | |
| 430 } | |
| 431 bench->postDraw(canvas); | |
| 432 return elapsed; | |
| 433 } | |
| 434 | |
| 435 // TODO For now we don't use the background timing thread to tune loops | |
| 436 static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL
ag) { | |
| 437 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp
uMs. | |
| 438 int loops = bench->calculateLoops(FLAGS_loops); | |
| 439 if (kAutoTuneLoops == loops) { | |
| 440 loops = 1; | |
| 441 double elapsed = 0; | |
| 442 do { | |
| 443 if (1<<30 == loops) { | |
| 444 // We're about to wrap. Something's wrong with the bench. | |
| 445 loops = 0; | |
| 446 break; | |
| 447 } | |
| 448 loops *= 2; | |
| 449 // If the GPU lets frames lag at all, we need to make sure we're tim
ing | |
| 450 // _this_ round, not still timing last round. | |
| 451 for (int i = 0; i < maxGpuFrameLag; i++) { | |
| 452 elapsed = time(loops, bench, target, nullptr); | |
| 453 } | |
| 454 } while (elapsed < FLAGS_gpuMs); | |
| 455 | |
| 456 // We've overshot at least a little. Scale back linearly. | |
| 457 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); | |
| 458 loops = clamp_loops(loops); | |
| 459 | |
| 460 // Make sure we're not still timing our calibration. | |
| 461 target->finish(); | |
| 462 } else { | |
| 463 loops = detect_forever_loops(loops); | |
| 464 } | |
| 465 | |
| 466 // Pretty much the same deal as the calibration: do some warmup to make | |
| 467 // sure we're timing steady-state pipelined frames. | |
| 468 for (int i = 0; i < maxGpuFrameLag - 1; i++) { | |
| 469 time(loops, bench, target, nullptr); | |
| 470 } | |
| 471 | |
| 472 return loops; | |
| 473 } | |
| 474 | |
| 475 struct AutoSetupContextBenchAndTarget { | |
| 476 AutoSetupContextBenchAndTarget(Benchmark* bench) : fBenchmark(bench) { | |
| 477 GrContextOptions grContextOpts; | |
| 478 fCtxFactory.reset(new GrContextFactory(grContextOpts)); | |
| 479 | |
| 480 SkAssertResult(fTarget.init(bench, fCtxFactory, false, | |
| 481 GrContextFactory::kNativeGL_ContextType, | |
| 482 GrContextFactory::kNone_ContextOptions, 0)); | |
| 483 | |
| 484 fCanvas = fTarget.getCanvas(); | |
| 485 fTarget.setup(); | |
| 486 | |
| 487 bench->perCanvasPreDraw(fCanvas); | |
| 488 fTarget.needsFrameTiming(&fMaxFrameLag); | |
| 489 } | |
| 490 | |
| 491 int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag);
} | |
| 492 | |
| 493 double timeSample(int loops, TimingThread* timingThread) { | |
| 494 for (int i = 0; i < fMaxFrameLag; i++) { | |
| 495 time(loops, fBenchmark, &fTarget, timingThread); | |
| 496 } | |
| 497 | |
| 498 return time(loops, fBenchmark, &fTarget, timingThread) / loops; | |
| 499 } | |
| 500 | |
| 501 void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); } | |
| 502 | |
| 503 SkAutoTDelete<GrContextFactory> fCtxFactory; | |
| 504 GPUTarget fTarget; | |
| 505 SkCanvas* fCanvas; | |
| 506 Benchmark* fBenchmark; | |
| 507 int fMaxFrameLag; | |
| 508 }; | |
| 509 | |
| 510 int setup_loops(Benchmark* bench) { | |
| 511 AutoSetupContextBenchAndTarget ascbt(bench); | |
| 512 int loops = ascbt.getLoops(); | |
| 513 ascbt.teardownBench(); | |
| 514 | |
| 515 if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) { | |
| 516 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu"); | |
| 517 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName()
); | |
| 518 pngFilename.append(".png"); | |
| 519 write_canvas_png(&ascbt.fTarget, pngFilename); | |
| 520 } | |
| 521 return loops; | |
| 522 } | |
| 523 | |
| 524 struct Sample { | |
| 525 double fCpu; | |
| 526 double fGpu; | |
| 527 }; | |
| 528 | |
| 529 Sample time_sample(Benchmark* bench, int loops) { | |
| 530 AutoSetupContextBenchAndTarget ascbt(bench); | |
| 531 | |
| 532 Sample sample; | |
| 533 if (FLAGS_useBackgroundThread) { | |
| 534 TimingThread timingThread(ascbt.fTarget.gl()); | |
| 535 SkAutoTDelete<SkThread> nativeThread(new SkThread(TimingThread::Loop, &t
imingThread)); | |
| 536 nativeThread->start(); | |
| 537 sample.fCpu = ascbt.timeSample(loops, &timingThread); | |
| 538 nativeThread->join(); | |
| 539 | |
| 540 // return the min | |
| 541 double min = SK_ScalarMax; | |
| 542 for (int i = 0; i < timingThread.timings().count(); i++) { | |
| 543 min = SkTMin(min, timingThread.timings()[i]); | |
| 544 } | |
| 545 sample.fGpu = min; | |
| 546 } else { | |
| 547 sample.fCpu = ascbt.timeSample(loops, nullptr); | |
| 548 } | |
| 549 | |
| 550 ascbt.teardownBench(); | |
| 551 | |
| 552 return sample; | |
| 553 } | |
| 554 | |
| 555 } // namespace kilobench | |
| 556 | |
| 557 static const int kOutResultSize = 1024; | |
| 558 | |
| 559 void printResult(const SkTArray<double>& samples, int loops, const char* name, c
onst char* mod) { | |
| 560 SkString newName(name); | |
| 561 newName.appendf("_%s", mod); | |
| 562 Stats stats(samples); | |
| 563 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean; | |
| 564 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" | |
| 565 , loops | |
| 566 , HUMANIZE(stats.min) | |
| 567 , HUMANIZE(stats.median) | |
| 568 , HUMANIZE(stats.mean) | |
| 569 , HUMANIZE(stats.max) | |
| 570 , stddev_percent | |
| 571 , stats.plot.c_str() | |
| 572 , "gpu" | |
| 573 , newName.c_str() | |
| 574 ); | |
| 575 } | |
| 576 | |
| 577 int kilobench_main() { | |
| 578 kilobench::BenchmarkStream benchStream; | |
| 579 | |
| 580 SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", | |
| 581 FLAGS_samples, "samples"); | |
| 582 | |
| 583 int descriptors[2]; | |
| 584 if (pipe(descriptors) != 0) { | |
| 585 SkFAIL("Failed to open a pipe\n"); | |
| 586 } | |
| 587 | |
| 588 while (Benchmark* b = benchStream.next()) { | |
| 589 SkAutoTDelete<Benchmark> bench(b); | |
| 590 | |
| 591 int loops = 1; | |
| 592 SkTArray<double> cpuSamples; | |
| 593 SkTArray<double> gpuSamples; | |
| 594 for (int i = 0; i < FLAGS_samples + 1; i++) { | |
| 595 // We fork off a new process to setup the grcontext and run the test
while we wait | |
| 596 if (FLAGS_useMultiProcess) { | |
| 597 int childPid = fork(); | |
| 598 if (childPid > 0) { | |
| 599 char result[kOutResultSize]; | |
| 600 if (read(descriptors[0], result, kOutResultSize) < 0) { | |
| 601 SkFAIL("Failed to read from pipe\n"); | |
| 602 } | |
| 603 | |
| 604 // if samples == 0 then parse # of loops | |
| 605 // else parse float | |
| 606 if (i == 0) { | |
| 607 sscanf(result, "%d", &loops); | |
| 608 } else { | |
| 609 sscanf(result, "%lf %lf", &cpuSamples.push_back(), | |
| 610 &gpuSamples.push_back()); | |
| 611 } | |
| 612 | |
| 613 // wait until exit | |
| 614 int status; | |
| 615 waitpid(childPid, &status, 0); | |
| 616 } else if (0 == childPid) { | |
| 617 char result[kOutResultSize]; | |
| 618 if (i == 0) { | |
| 619 sprintf(result, "%d", kilobench::setup_loops(bench)); | |
| 620 } else { | |
| 621 kilobench::Sample sample = kilobench::time_sample(bench,
loops); | |
| 622 sprintf(result, "%lf %lf", sample.fCpu, sample.fGpu); | |
| 623 } | |
| 624 | |
| 625 // Make sure to write the null terminator | |
| 626 if (write(descriptors[1], result, strlen(result) + 1) < 0) { | |
| 627 SkFAIL("Failed to write to pipe\n"); | |
| 628 } | |
| 629 return 0; | |
| 630 } else { | |
| 631 SkFAIL("Fork failed\n"); | |
| 632 } | |
| 633 } else { | |
| 634 if (i == 0) { | |
| 635 loops = kilobench::setup_loops(bench); | |
| 636 } else { | |
| 637 kilobench::Sample sample = kilobench::time_sample(bench, loo
ps); | |
| 638 cpuSamples.push_back(sample.fCpu); | |
| 639 gpuSamples.push_back(sample.fGpu); | |
| 640 } | |
| 641 } | |
| 642 } | |
| 643 | |
| 644 printResult(cpuSamples, loops, bench->getUniqueName(), "cpu"); | |
| 645 if (FLAGS_useBackgroundThread) { | |
| 646 printResult(gpuSamples, loops, bench->getUniqueName(), "gpu"); | |
| 647 } | |
| 648 } | |
| 649 return 0; | |
| 650 } | |
| 651 | |
| 652 #if !defined SK_BUILD_FOR_IOS | |
| 653 int main(int argc, char** argv) { | |
| 654 SkCommandLineFlags::Parse(argc, argv); | |
| 655 return kilobench_main(); | |
| 656 } | |
| 657 #endif | |
| OLD | NEW |