tools/kilobench/kilobench.cpp - Issue 1612513002: Add a background timing thread to kilobench

Unified Diff: tools/kilobench/kilobench.cpp

Issue 1612513002: Add a background timing thread to kilobench (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: tweaks Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/kilobench/kilobench.cpp

diff --git a/tools/kilobench/kilobench.cpp b/tools/kilobench/kilobench.cpp

index 8c844f47e0169ac26f72cae33d22b03fb52d8e50..6a9030e49c7e8764e959e89317b29e701ff0f1db 100644

--- a/tools/kilobench/kilobench.cpp

+++ b/tools/kilobench/kilobench.cpp

@@ -14,10 +14,15 @@

#include "SkStream.h"

#include "SkSurface.h"

#include "SkTime.h"

+#include "SkTLList.h"

+#include "SkThreadUtils.h"

#include "Stats.h"

#include "Timer.h"

#include "VisualSKPBench.h"

#include "gl/GrGLDefines.h"

+#include "../private/SkMutex.h"

+#include "../private/SkSemaphore.h"

+#include "../private/SkGpuFenceSync.h"

// posix only for now

#include <unistd.h>

@@ -34,7 +39,6 @@

#include "SkImageDecoder.h"

__SK_FORCE_IMAGE_DECODER_LINKING;

static const int kAutoTuneLoops = 0;

static const int kDefaultLoops =

@@ -68,6 +72,8 @@ DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this.");

DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str());

DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU.");

DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs.");

+DEFINE_bool(useBackgroundThread, true, "If false, kilobench will time cpu / gpu work together");

+DEFINE_bool(useMultiProcess, true, "If false, kilobench will run all tests in one process");

static SkString humanize(double ms) {

return HumanizeMs(ms);

@@ -146,25 +152,25 @@ private:

struct GPUTarget {

void setup() {

- this->gl->makeCurrent();

+ this->fGL->makeCurrent();

bsalomon 2016/01/20 18:54:52 We don't really need all these thises (these?, thi

joshualitt 2016/01/20 21:14:52 Acknowledged.

// Make sure we're done with whatever came before.

- SK_GL(*this->gl, Finish());

+ SK_GL(*this->fGL, Finish());

}

SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; }

- void endTiming() {

- if (this->gl) {

- SK_GL(*this->gl, Flush());

- this->gl->swapBuffers();

+ void endTiming(bool usePlatformSwapBuffers) {

+ if (this->fGL) {

+ SK_GL(*this->fGL, Flush());

+ this->fGL->swapBuffers(usePlatformSwapBuffers);

}

void fence() {

- SK_GL(*this->gl, Finish());

+ SK_GL(*this->fGL, Finish());

}

bool needsFrameTiming(int* maxFrameLag) const {

- if (!this->gl->getMaxGpuFrameLag(maxFrameLag)) {

+ if (!this->fGL->getMaxGpuFrameLag(maxFrameLag)) {

// Frame lag is unknown.

*maxFrameLag = FLAGS_gpuFrameLag;

}

@@ -185,13 +191,13 @@ struct GPUTarget {

this->surface.reset(SkSurface::NewRenderTarget(context,

SkSurface::kNo_Budgeted, info,

numSamples, &props));

- this->gl = factory->getContextInfo(ctxType, ctxOptions).fGLContext;

+ this->fGL = factory->getContextInfo(ctxType, ctxOptions).fGLContext;

if (!this->surface.get()) {

return false;

}

// Kilobench should only be used on platforms with fence sync support

- SkASSERT(this->gl->fenceSyncSupport());

+ SkASSERT(this->fGL->fenceSyncSupport());

return true;

}

@@ -215,9 +221,10 @@ struct GPUTarget {

return true;

}

+ SkGLContext* gl() { return fGL; }

private:

- //const Config config;

- SkGLContext* gl;

+ SkGLContext* fGL;

SkAutoTDelete<SkSurface> surface;

};

@@ -276,24 +283,145 @@ static int clamp_loops(int loops) {

}

static double now_ms() { return SkTime::GetNSecs() * 1e-6; }

-static double time(int loops, Benchmark* bench, GPUTarget* target) {

- SkCanvas* canvas = target->getCanvas();

- if (canvas) {

- canvas->clear(SK_ColorWHITE);

+struct TimingThread {

+ TimingThread(SkGLContext* mainContext)

+ : fFenceSync(mainContext->fenceSync())

+ , fMainContext(mainContext)

+ , fDone(false) {}

+ static void Loop(void* data) {

+ TimingThread* timingThread = reinterpret_cast<TimingThread*>(data);

+ timingThread->timingLoop();

+ }

+ void timingLoop() {

+ // Create a context which shares display lists with the main thread

+ SkAutoTDelete<SkGLContext> glContext(SkCreatePlatformGLContext(kNone_GrGLStandard,

+ fMainContext));

+ glContext->makeCurrent();

+ // Basic timing methodology is:

+ // 1) Wait on semaphore until main thread indicates its time to start timing the frame

+ // 2) Wait on frame start sync, record time. This is start of the frame.

+ // 3) Wait on semaphore until main thread indicates its time to finish timing the frame

+ // 4) Wait on frame end sync, record time. FrameEndTime - FrameStartTime = frame time

+ // 5) Wait on semaphore until main thread indicates we should time the next frame or quit

+ while (true) {

+ fSemaphore.wait();

+ // get start sync

+ SkPlatformGpuFence startSync = this->popStartSync();

+ // wait on sync

+ fFenceSync->flushAndWaitFence(startSync);

bsalomon 2016/01/20 18:54:52 Assume in light of other CL, this will not flush?

joshualitt 2016/01/20 21:14:52 Acknowledged.

+ double start = kilobench::now_ms();

+ // do we want to sleep here?

+ // wait for end sync

+ fSemaphore.wait();

+ // get end sync

+ SkPlatformGpuFence endSync = this->popEndSync();

+ // wait on sync

+ fFenceSync->flushAndWaitFence(endSync);

+ double elapsed = kilobench::now_ms() - start;

+ // No mutex needed, client won't touch timings until we're done

+ fTimings.push_back(elapsed);

+ // clean up fences

+ fFenceSync->deleteFence(startSync);

+ fFenceSync->deleteFence(endSync);

+ fSemaphore.wait();

+ if (this->isDone()) {

+ break;

+ }

+ void pushStartSync() { this->pushSync(&fFrameStartSyncs, &fFrameStartSyncsMutex); }

+ SkPlatformGpuFence popStartSync() {

+ return this->popSync(&fFrameStartSyncs, &fFrameStartSyncsMutex);

+ }

+ void pushEndSync() { this->pushSync(&fFrameEndSyncs, &fFrameEndSyncsMutex); }

+ SkPlatformGpuFence popEndSync() { return this->popSync(&fFrameEndSyncs, &fFrameEndSyncsMutex); }

+ void setDone() {

+ SkAutoMutexAcquire done(fDoneMutex);

+ fDone = true;

+ fSemaphore.signal();

+ }

+ typedef SkTLList<SkPlatformGpuFence, 1> SyncQueue;

+ void pushSync(SyncQueue* queue, SkMutex* mutex) {

+ SkAutoMutexAcquire am(mutex);

+ *queue->addToHead() = fFenceSync->insertFence();

+ fSemaphore.signal();

+ }

+ SkPlatformGpuFence popSync(SyncQueue* queue, SkMutex* mutex) {

+ SkAutoMutexAcquire am(mutex);

+ SkPlatformGpuFence sync = *queue->head();

+ queue->popHead();

+ return sync;

+ }

+ bool isDone() {

+ SkAutoMutexAcquire am1(fFrameStartSyncsMutex);

+ SkAutoMutexAcquire done(fDoneMutex);

+ if (fDone && fFrameStartSyncs.isEmpty()) {

+ return true;

+ } else {

+ return false;

+ }

}

+ const SkTArray<double>& timings() const { SkASSERT(fDone); return fTimings; }

+private:

+ SkGpuFenceSync* fFenceSync;

+ SkSemaphore fSemaphore;

+ SkMutex fFrameStartSyncsMutex;

+ SyncQueue fFrameStartSyncs;

+ SkMutex fFrameEndSyncsMutex;

+ SyncQueue fFrameEndSyncs;

+ SkTArray<double> fTimings;

+ SkMutex fDoneMutex;

+ SkGLContext* fMainContext;

+ bool fDone;

+};

+static double time(int loops, Benchmark* bench, GPUTarget* target, TimingThread* timingThread) {

+ SkCanvas* canvas = target->getCanvas();

+ canvas->clear(SK_ColorWHITE);

bench->preDraw(canvas);

+ if (timingThread) {

+ timingThread->pushStartSync();

+ }

double start = now_ms();

canvas = target->beginTiming(canvas);

bench->draw(loops, canvas);

- if (canvas) {

- canvas->flush();

- }

- target->endTiming();

+ canvas->flush();

+ target->endTiming(timingThread ? true : false);

double elapsed = now_ms() - start;

+ if (timingThread) {

+ timingThread->pushEndSync();

+ timingThread->setDone();

+ }

bench->postDraw(canvas);

return elapsed;

}

+// TODO For now we don't use the background timing thread to tune loops

static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameLag) {

// First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs.

int loops = bench->calculateLoops(FLAGS_loops);

@@ -310,7 +438,7 @@ static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL

// If the GPU lets frames lag at all, we need to make sure we're timing

// _this_ round, not still timing last round.

for (int i = 0; i < maxGpuFrameLag; i++) {

- elapsed = time(loops, bench, target);

+ elapsed = time(loops, bench, target, nullptr);

}

} while (elapsed < FLAGS_gpuMs);

@@ -327,7 +455,7 @@ static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL

// Pretty much the same deal as the calibration: do some warmup to make

// sure we're timing steady-state pipelined frames.

for (int i = 0; i < maxGpuFrameLag - 1; i++) {

- time(loops, bench, target);

+ time(loops, bench, target, nullptr);

}

return loops;

@@ -351,13 +479,14 @@ struct AutoSetupContextBenchAndTarget {

int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag); }

- double timeSample(int loops) {

+ double timeSample(int loops, TimingThread* timingThread) {

for (int i = 0; i < fMaxFrameLag; i++) {

- time(loops, fBenchmark, &fTarget);

+ time(loops, fBenchmark, &fTarget, timingThread);

}

- return time(loops, fBenchmark, &fTarget) / loops;

+ return time(loops, fBenchmark, &fTarget, timingThread) / loops;

}

void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); }

SkAutoTDelete<GrContextFactory> fCtxFactory;

@@ -383,7 +512,23 @@ int setup_loops(Benchmark* bench) {

double time_sample(Benchmark* bench, int loops) {

AutoSetupContextBenchAndTarget ascbt(bench);

- double sample = ascbt.timeSample(loops);

+ double sample;

+ if (FLAGS_useBackgroundThread) {

+ TimingThread timingThread(ascbt.fTarget.gl());

+ SkAutoTDelete<SkThread> nativeThread(new SkThread(TimingThread::Loop, &timingThread));

+ nativeThread->start();

+ sample = ascbt.timeSample(loops, &timingThread);

+ nativeThread->join();

+ // TODO get these times out of here

+ for (int i = 0; i < timingThread.timings().count(); i++) {

+ SkDebugf("gpu times %s\n", HUMANIZE(timingThread.timings()[i]));

+ }

+ } else {

+ sample = ascbt.timeSample(loops, nullptr);

+ }

ascbt.teardownBench();

return sample;

@@ -407,43 +552,51 @@ int kilobench_main() {

while (Benchmark* b = benchStream.next()) {

SkAutoTDelete<Benchmark> bench(b);

- int loops;

+ int loops = 1;

SkTArray<double> samples;

for (int i = 0; i < FLAGS_samples + 1; i++) {

// We fork off a new process to setup the grcontext and run the test while we wait

- int childPid = fork();

- if (childPid > 0) {

- char result[kOutResultSize];

- if (read(descriptors[0], result, kOutResultSize) < 0) {

- SkFAIL("Failed to read from pipe\n");

- }

- // if samples == 0 then parse # of loops

- // else parse float

- if (i == 0) {

- sscanf(result, "%d", &loops);

+ if (FLAGS_useMultiProcess) {

+ int childPid = fork();

+ if (childPid > 0) {

+ char result[kOutResultSize];

+ if (read(descriptors[0], result, kOutResultSize) < 0) {

+ SkFAIL("Failed to read from pipe\n");

+ }

+ // if samples == 0 then parse # of loops

+ // else parse float

+ if (i == 0) {

+ sscanf(result, "%d", &loops);

+ } else {

+ sscanf(result, "%lf", &samples.push_back());

+ }

+ // wait until exit

+ int status;

+ waitpid(childPid, &status, 0);

+ } else if (0 == childPid) {

+ char result[kOutResultSize];

+ if (i == 0) {

+ sprintf(result, "%d", kilobench::setup_loops(bench));

+ } else {

+ sprintf(result, "%lf", kilobench::time_sample(bench, loops));

+ }

+ // Make sure to write the null terminator

+ if (write(descriptors[1], result, strlen(result) + 1) < 0) {

+ SkFAIL("Failed to write to pipe\n");

+ }

+ return 0;

} else {

- sscanf(result, "%lf", &samples.push_back());

+ SkFAIL("Fork failed\n");

}

- // wait until exit

- int status;

- waitpid(childPid, &status, 0);

- } else if (0 == childPid) {

- char result[kOutResultSize];

+ } else {

if (i == 0) {

- sprintf(result, "%d", kilobench::setup_loops(bench));

+ loops = kilobench::setup_loops(bench);

} else {

- sprintf(result, "%lf", kilobench::time_sample(bench, loops));

+ samples.push_back() = kilobench::time_sample(bench, loops);

}

- // Make sure to write the null terminator

- if (write(descriptors[1], result, strlen(result) + 1) < 0) {

- SkFAIL("Failed to write to pipe\n");

- }

- return 0;

- } else {

- SkFAIL("Fork failed\n");

}

« include/gpu/gl/SkGLContext.h ('K') | « src/gpu/gl/SkGLContext.cpp ('k') | no next file » | no next file with comments »