Chromium Code Reviews| Index: tools/kilobench/kilobench.cpp |
| diff --git a/tools/kilobench/kilobench.cpp b/tools/kilobench/kilobench.cpp |
| index 8c844f47e0169ac26f72cae33d22b03fb52d8e50..6a9030e49c7e8764e959e89317b29e701ff0f1db 100644 |
| --- a/tools/kilobench/kilobench.cpp |
| +++ b/tools/kilobench/kilobench.cpp |
| @@ -14,10 +14,15 @@ |
| #include "SkStream.h" |
| #include "SkSurface.h" |
| #include "SkTime.h" |
| +#include "SkTLList.h" |
| +#include "SkThreadUtils.h" |
| #include "Stats.h" |
| #include "Timer.h" |
| #include "VisualSKPBench.h" |
| #include "gl/GrGLDefines.h" |
| +#include "../private/SkMutex.h" |
| +#include "../private/SkSemaphore.h" |
| +#include "../private/SkGpuFenceSync.h" |
| // posix only for now |
| #include <unistd.h> |
| @@ -34,7 +39,6 @@ |
| #include "SkImageDecoder.h" |
| __SK_FORCE_IMAGE_DECODER_LINKING; |
| - |
| static const int kAutoTuneLoops = 0; |
| static const int kDefaultLoops = |
| @@ -68,6 +72,8 @@ DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); |
| DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); |
| DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); |
| DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs."); |
| +DEFINE_bool(useBackgroundThread, true, "If false, kilobench will time cpu / gpu work together"); |
| +DEFINE_bool(useMultiProcess, true, "If false, kilobench will run all tests in one process"); |
| static SkString humanize(double ms) { |
| return HumanizeMs(ms); |
| @@ -146,25 +152,25 @@ private: |
| struct GPUTarget { |
| void setup() { |
| - this->gl->makeCurrent(); |
| + this->fGL->makeCurrent(); |
|
bsalomon
2016/01/20 18:54:52
We don't really need all these thises (these?, thi
joshualitt
2016/01/20 21:14:52
Acknowledged.
|
| // Make sure we're done with whatever came before. |
| - SK_GL(*this->gl, Finish()); |
| + SK_GL(*this->fGL, Finish()); |
| } |
| SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; } |
| - void endTiming() { |
| - if (this->gl) { |
| - SK_GL(*this->gl, Flush()); |
| - this->gl->swapBuffers(); |
| + void endTiming(bool usePlatformSwapBuffers) { |
| + if (this->fGL) { |
| + SK_GL(*this->fGL, Flush()); |
| + this->fGL->swapBuffers(usePlatformSwapBuffers); |
| } |
| } |
| void fence() { |
| - SK_GL(*this->gl, Finish()); |
| + SK_GL(*this->fGL, Finish()); |
| } |
| bool needsFrameTiming(int* maxFrameLag) const { |
| - if (!this->gl->getMaxGpuFrameLag(maxFrameLag)) { |
| + if (!this->fGL->getMaxGpuFrameLag(maxFrameLag)) { |
| // Frame lag is unknown. |
| *maxFrameLag = FLAGS_gpuFrameLag; |
| } |
| @@ -185,13 +191,13 @@ struct GPUTarget { |
| this->surface.reset(SkSurface::NewRenderTarget(context, |
| SkSurface::kNo_Budgeted, info, |
| numSamples, &props)); |
| - this->gl = factory->getContextInfo(ctxType, ctxOptions).fGLContext; |
| + this->fGL = factory->getContextInfo(ctxType, ctxOptions).fGLContext; |
| if (!this->surface.get()) { |
| return false; |
| } |
| // Kilobench should only be used on platforms with fence sync support |
| - SkASSERT(this->gl->fenceSyncSupport()); |
| + SkASSERT(this->fGL->fenceSyncSupport()); |
| return true; |
| } |
| @@ -215,9 +221,10 @@ struct GPUTarget { |
| return true; |
| } |
| + SkGLContext* gl() { return fGL; } |
| + |
| private: |
| - //const Config config; |
| - SkGLContext* gl; |
| + SkGLContext* fGL; |
| SkAutoTDelete<SkSurface> surface; |
| }; |
| @@ -276,24 +283,145 @@ static int clamp_loops(int loops) { |
| } |
| static double now_ms() { return SkTime::GetNSecs() * 1e-6; } |
| -static double time(int loops, Benchmark* bench, GPUTarget* target) { |
| - SkCanvas* canvas = target->getCanvas(); |
| - if (canvas) { |
| - canvas->clear(SK_ColorWHITE); |
| + |
| +struct TimingThread { |
| + TimingThread(SkGLContext* mainContext) |
| + : fFenceSync(mainContext->fenceSync()) |
| + , fMainContext(mainContext) |
| + , fDone(false) {} |
| + |
| + static void Loop(void* data) { |
| + TimingThread* timingThread = reinterpret_cast<TimingThread*>(data); |
| + timingThread->timingLoop(); |
| + } |
| + |
| + void timingLoop() { |
| + // Create a context which shares display lists with the main thread |
| + SkAutoTDelete<SkGLContext> glContext(SkCreatePlatformGLContext(kNone_GrGLStandard, |
| + fMainContext)); |
| + glContext->makeCurrent(); |
| + |
| + // Basic timing methodology is: |
| + // 1) Wait on semaphore until main thread indicates its time to start timing the frame |
| + // 2) Wait on frame start sync, record time. This is start of the frame. |
| + // 3) Wait on semaphore until main thread indicates its time to finish timing the frame |
| + // 4) Wait on frame end sync, record time. FrameEndTime - FrameStartTime = frame time |
| + // 5) Wait on semaphore until main thread indicates we should time the next frame or quit |
| + while (true) { |
| + fSemaphore.wait(); |
| + |
| + // get start sync |
| + SkPlatformGpuFence startSync = this->popStartSync(); |
| + |
| + // wait on sync |
| + fFenceSync->flushAndWaitFence(startSync); |
|
bsalomon
2016/01/20 18:54:52
Assume in light of other CL, this will not flush?
joshualitt
2016/01/20 21:14:52
Acknowledged.
|
| + double start = kilobench::now_ms(); |
| + |
| + // do we want to sleep here? |
| + // wait for end sync |
| + fSemaphore.wait(); |
| + |
| + // get end sync |
| + SkPlatformGpuFence endSync = this->popEndSync(); |
| + |
| + // wait on sync |
| + fFenceSync->flushAndWaitFence(endSync); |
| + double elapsed = kilobench::now_ms() - start; |
| + |
| + // No mutex needed, client won't touch timings until we're done |
| + fTimings.push_back(elapsed); |
| + |
| + // clean up fences |
| + fFenceSync->deleteFence(startSync); |
| + fFenceSync->deleteFence(endSync); |
| + |
| + fSemaphore.wait(); |
| + if (this->isDone()) { |
| + break; |
| + } |
| + } |
| + } |
| + |
| + void pushStartSync() { this->pushSync(&fFrameStartSyncs, &fFrameStartSyncsMutex); } |
| + |
| + SkPlatformGpuFence popStartSync() { |
| + return this->popSync(&fFrameStartSyncs, &fFrameStartSyncsMutex); |
| + } |
| + |
| + void pushEndSync() { this->pushSync(&fFrameEndSyncs, &fFrameEndSyncsMutex); } |
| + |
| + SkPlatformGpuFence popEndSync() { return this->popSync(&fFrameEndSyncs, &fFrameEndSyncsMutex); } |
| + |
| + void setDone() { |
| + SkAutoMutexAcquire done(fDoneMutex); |
| + fDone = true; |
| + fSemaphore.signal(); |
| + } |
| + |
| + typedef SkTLList<SkPlatformGpuFence, 1> SyncQueue; |
| + |
| + void pushSync(SyncQueue* queue, SkMutex* mutex) { |
| + SkAutoMutexAcquire am(mutex); |
| + *queue->addToHead() = fFenceSync->insertFence(); |
| + fSemaphore.signal(); |
| + } |
| + |
| + SkPlatformGpuFence popSync(SyncQueue* queue, SkMutex* mutex) { |
| + SkAutoMutexAcquire am(mutex); |
| + SkPlatformGpuFence sync = *queue->head(); |
| + queue->popHead(); |
| + return sync; |
| + } |
| + |
| + bool isDone() { |
| + SkAutoMutexAcquire am1(fFrameStartSyncsMutex); |
| + SkAutoMutexAcquire done(fDoneMutex); |
| + if (fDone && fFrameStartSyncs.isEmpty()) { |
| + return true; |
| + } else { |
| + return false; |
| + } |
| } |
| + |
| + const SkTArray<double>& timings() const { SkASSERT(fDone); return fTimings; } |
| + |
| +private: |
| + SkGpuFenceSync* fFenceSync; |
| + SkSemaphore fSemaphore; |
| + SkMutex fFrameStartSyncsMutex; |
| + SyncQueue fFrameStartSyncs; |
| + SkMutex fFrameEndSyncsMutex; |
| + SyncQueue fFrameEndSyncs; |
| + SkTArray<double> fTimings; |
| + SkMutex fDoneMutex; |
| + SkGLContext* fMainContext; |
| + bool fDone; |
| +}; |
| + |
| +static double time(int loops, Benchmark* bench, GPUTarget* target, TimingThread* timingThread) { |
| + SkCanvas* canvas = target->getCanvas(); |
| + canvas->clear(SK_ColorWHITE); |
| bench->preDraw(canvas); |
| + |
| + if (timingThread) { |
| + timingThread->pushStartSync(); |
| + } |
| double start = now_ms(); |
| canvas = target->beginTiming(canvas); |
| bench->draw(loops, canvas); |
| - if (canvas) { |
| - canvas->flush(); |
| - } |
| - target->endTiming(); |
| + canvas->flush(); |
| + target->endTiming(timingThread ? true : false); |
| + |
| double elapsed = now_ms() - start; |
| + if (timingThread) { |
| + timingThread->pushEndSync(); |
| + timingThread->setDone(); |
| + } |
| bench->postDraw(canvas); |
| return elapsed; |
| } |
| +// TODO For now we don't use the background timing thread to tune loops |
| static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameLag) { |
| // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs. |
| int loops = bench->calculateLoops(FLAGS_loops); |
| @@ -310,7 +438,7 @@ static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL |
| // If the GPU lets frames lag at all, we need to make sure we're timing |
| // _this_ round, not still timing last round. |
| for (int i = 0; i < maxGpuFrameLag; i++) { |
| - elapsed = time(loops, bench, target); |
| + elapsed = time(loops, bench, target, nullptr); |
| } |
| } while (elapsed < FLAGS_gpuMs); |
| @@ -327,7 +455,7 @@ static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL |
| // Pretty much the same deal as the calibration: do some warmup to make |
| // sure we're timing steady-state pipelined frames. |
| for (int i = 0; i < maxGpuFrameLag - 1; i++) { |
| - time(loops, bench, target); |
| + time(loops, bench, target, nullptr); |
| } |
| return loops; |
| @@ -351,13 +479,14 @@ struct AutoSetupContextBenchAndTarget { |
| int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag); } |
| - double timeSample(int loops) { |
| + double timeSample(int loops, TimingThread* timingThread) { |
| for (int i = 0; i < fMaxFrameLag; i++) { |
| - time(loops, fBenchmark, &fTarget); |
| + time(loops, fBenchmark, &fTarget, timingThread); |
| } |
| - return time(loops, fBenchmark, &fTarget) / loops; |
| + return time(loops, fBenchmark, &fTarget, timingThread) / loops; |
| } |
| + |
| void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); } |
| SkAutoTDelete<GrContextFactory> fCtxFactory; |
| @@ -383,7 +512,23 @@ int setup_loops(Benchmark* bench) { |
| double time_sample(Benchmark* bench, int loops) { |
| AutoSetupContextBenchAndTarget ascbt(bench); |
| - double sample = ascbt.timeSample(loops); |
| + |
| + double sample; |
| + if (FLAGS_useBackgroundThread) { |
| + TimingThread timingThread(ascbt.fTarget.gl()); |
| + SkAutoTDelete<SkThread> nativeThread(new SkThread(TimingThread::Loop, &timingThread)); |
| + nativeThread->start(); |
| + sample = ascbt.timeSample(loops, &timingThread); |
| + nativeThread->join(); |
| + |
| + // TODO get these times out of here |
| + for (int i = 0; i < timingThread.timings().count(); i++) { |
| + SkDebugf("gpu times %s\n", HUMANIZE(timingThread.timings()[i])); |
| + } |
| + } else { |
| + sample = ascbt.timeSample(loops, nullptr); |
| + } |
| + |
| ascbt.teardownBench(); |
| return sample; |
| @@ -407,43 +552,51 @@ int kilobench_main() { |
| while (Benchmark* b = benchStream.next()) { |
| SkAutoTDelete<Benchmark> bench(b); |
| - int loops; |
| + int loops = 1; |
| SkTArray<double> samples; |
| for (int i = 0; i < FLAGS_samples + 1; i++) { |
| // We fork off a new process to setup the grcontext and run the test while we wait |
| - int childPid = fork(); |
| - if (childPid > 0) { |
| - char result[kOutResultSize]; |
| - if (read(descriptors[0], result, kOutResultSize) < 0) { |
| - SkFAIL("Failed to read from pipe\n"); |
| - } |
| - |
| - // if samples == 0 then parse # of loops |
| - // else parse float |
| - if (i == 0) { |
| - sscanf(result, "%d", &loops); |
| + if (FLAGS_useMultiProcess) { |
| + int childPid = fork(); |
| + if (childPid > 0) { |
| + char result[kOutResultSize]; |
| + if (read(descriptors[0], result, kOutResultSize) < 0) { |
| + SkFAIL("Failed to read from pipe\n"); |
| + } |
| + |
| + // if samples == 0 then parse # of loops |
| + // else parse float |
| + if (i == 0) { |
| + sscanf(result, "%d", &loops); |
| + } else { |
| + sscanf(result, "%lf", &samples.push_back()); |
| + } |
| + |
| + // wait until exit |
| + int status; |
| + waitpid(childPid, &status, 0); |
| + } else if (0 == childPid) { |
| + char result[kOutResultSize]; |
| + if (i == 0) { |
| + sprintf(result, "%d", kilobench::setup_loops(bench)); |
| + } else { |
| + sprintf(result, "%lf", kilobench::time_sample(bench, loops)); |
| + } |
| + |
| + // Make sure to write the null terminator |
| + if (write(descriptors[1], result, strlen(result) + 1) < 0) { |
| + SkFAIL("Failed to write to pipe\n"); |
| + } |
| + return 0; |
| } else { |
| - sscanf(result, "%lf", &samples.push_back()); |
| + SkFAIL("Fork failed\n"); |
| } |
| - |
| - // wait until exit |
| - int status; |
| - waitpid(childPid, &status, 0); |
| - } else if (0 == childPid) { |
| - char result[kOutResultSize]; |
| + } else { |
| if (i == 0) { |
| - sprintf(result, "%d", kilobench::setup_loops(bench)); |
| + loops = kilobench::setup_loops(bench); |
| } else { |
| - sprintf(result, "%lf", kilobench::time_sample(bench, loops)); |
| + samples.push_back() = kilobench::time_sample(bench, loops); |
| } |
| - |
| - // Make sure to write the null terminator |
| - if (write(descriptors[1], result, strlen(result) + 1) < 0) { |
| - SkFAIL("Failed to write to pipe\n"); |
| - } |
| - return 0; |
| - } else { |
| - SkFAIL("Fork failed\n"); |
| } |
| } |