Index: tools/skpbench/skpbench.cpp |
diff --git a/tools/skpbench/skpbench.cpp b/tools/skpbench/skpbench.cpp |
index adb6af0b146669f6a1c44fa253f2ad7a5e7614a6..6d0381a28d75dcd3cfde5658bb737ea75b584930 100644 |
--- a/tools/skpbench/skpbench.cpp |
+++ b/tools/skpbench/skpbench.cpp |
@@ -5,6 +5,7 @@ |
* found in the LICENSE file. |
*/ |
+#include "GpuTimer.h" |
#include "GrContextFactory.h" |
#include "SkCanvas.h" |
#include "SkOSFile.h" |
@@ -33,12 +34,9 @@ |
* Currently, only GPU configs are supported. |
*/ |
-using sk_gpu_test::PlatformFence; |
-using sk_gpu_test::kInvalidPlatformFence; |
-using sk_gpu_test::FenceSync; |
- |
DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); |
DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); |
+DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)"); |
DEFINE_bool(fps, false, "use fps instead of ms"); |
DEFINE_string(skp, "", "path to a single .skp file to benchmark"); |
DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"); |
@@ -46,13 +44,13 @@ DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); |
DEFINE_bool(suppressHeader, false, "don't print a header row before the results"); |
static const char* header = |
- " accum median max min stddev samples sample_ms metric config bench"; |
+" accum median max min stddev samples sample_ms clock metric config bench"; |
static const char* resultFormat = |
- "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-6s %-9s %s"; |
+"%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s"; |
struct Sample { |
- using clock = std::chrono::high_resolution_clock; |
+ using duration = std::chrono::nanoseconds; |
Sample() : fFrames(0), fDuration(0) {} |
double seconds() const { return std::chrono::duration<double>(fDuration).count(); } |
@@ -60,13 +58,13 @@ struct Sample { |
double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; } |
static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } |
- int fFrames; |
- clock::duration fDuration; |
+ int fFrames; |
+ duration fDuration; |
}; |
class GpuSync { |
public: |
- GpuSync(const FenceSync* fenceSync); |
+ GpuSync(const sk_gpu_test::FenceSync* fenceSync); |
~GpuSync(); |
void syncToPreviousFrame(); |
@@ -74,8 +72,8 @@ public: |
private: |
void updateFence(); |
- const FenceSync* const fFenceSync; |
- PlatformFence fFence; |
+ const sk_gpu_test::FenceSync* const fFenceSync; |
+ sk_gpu_test::PlatformFence fFence; |
}; |
enum class ExitErr { |
@@ -92,10 +90,10 @@ static bool mkdir_p(const SkString& name); |
static SkString join(const SkCommandLineFlags::StringArray&); |
static void exitf(ExitErr, const char* format, ...); |
-static void run_benchmark(const FenceSync* fenceSync, SkCanvas* canvas, const SkPicture* skp, |
- std::vector<Sample>* samples) { |
- using clock = Sample::clock; |
- const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); |
+static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas, |
+ const SkPicture* skp, std::vector<Sample>* samples) { |
+ using clock = std::chrono::high_resolution_clock; |
+ const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); |
const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration); |
draw_skp_and_flush(canvas, skp); |
@@ -123,6 +121,66 @@ static void run_benchmark(const FenceSync* fenceSync, SkCanvas* canvas, const Sk |
} while (now < endTime || 0 == samples->size() % 2); |
} |
+static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer, |
+ const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas, |
+ const SkPicture* skp, std::vector<Sample>* samples) { |
+ using sk_gpu_test::PlatformTimerQuery; |
+ using clock = std::chrono::steady_clock; |
+ const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); |
+ const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration); |
+ |
+ if (!gpuTimer->disjointSupport()) { |
+ fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; " |
+ "results may be unreliable\n"); |
+ } |
+ |
+ draw_skp_and_flush(canvas, skp); |
+ GpuSync gpuSync(fenceSync); |
+ |
+ gpuTimer->queueStart(); |
+ draw_skp_and_flush(canvas, skp); |
+ PlatformTimerQuery previousTime = gpuTimer->queueStop(); |
+ gpuSync.syncToPreviousFrame(); |
+ |
+ clock::time_point now = clock::now(); |
+ const clock::time_point endTime = now + benchDuration; |
+ |
+ do { |
+ const clock::time_point sampleEndTime = now + sampleDuration; |
+ samples->emplace_back(); |
+ Sample& sample = samples->back(); |
+ |
+ do { |
+ gpuTimer->queueStart(); |
+ draw_skp_and_flush(canvas, skp); |
+ PlatformTimerQuery time = gpuTimer->queueStop(); |
+ gpuSync.syncToPreviousFrame(); |
+ |
+ switch (gpuTimer->checkQueryStatus(previousTime)) { |
+ using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus; |
+ case QueryStatus::kInvalid: |
+ exitf(ExitErr::kUnavailable, "GPU timer failed"); |
+ case QueryStatus::kPending: |
+ exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync"); |
+ case QueryStatus::kDisjoint: |
+ if (FLAGS_verbosity >= 4) { |
+ fprintf(stderr, "discarding timer query due to disjoint operations.\n"); |
+ } |
+ break; |
+ case QueryStatus::kAccurate: |
+ sample.fDuration += gpuTimer->getTimeElapsed(previousTime); |
+ ++sample.fFrames; |
+ break; |
+ } |
+ gpuTimer->deleteQuery(previousTime); |
+ previousTime = time; |
+ now = clock::now(); |
+ } while (now < sampleEndTime || 0 == sample.fFrames); |
+ } while (now < endTime || 0 == samples->size() % 2); |
+ |
+ gpuTimer->deleteQuery(previousTime); |
+} |
+ |
void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) { |
if (0 == (samples.size() % 2)) { |
exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples"); |
@@ -149,7 +207,8 @@ void print_result(const std::vector<Sample>& samples, const char* config, const |
const double stddev = 100/*%*/ * sqrt(variance) / accumValue; |
printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(), |
- stddev, values.size(), FLAGS_sampleMs, Sample::metric(), config, bench); |
+ stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(), |
+ config, bench); |
printf("\n"); |
fflush(stdout); |
} |
@@ -247,7 +306,15 @@ int main(int argc, char** argv) { |
// Run the benchmark. |
SkCanvas* canvas = surface->getCanvas(); |
canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); |
- run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); |
+ if (!FLAGS_gpuClock) { |
+ run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); |
+ } else { |
+ if (!testCtx->gpuTimingSupport()) { |
+ exitf(ExitErr::kUnavailable, "GPU does not support timing"); |
+ } |
+ run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(), |
+ &samples); |
+ } |
print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).c_str()); |
// Save a proof (if one was requested). |
@@ -300,7 +367,7 @@ static void exitf(ExitErr err, const char* format, ...) { |
exit((int)err); |
} |
-GpuSync::GpuSync(const FenceSync* fenceSync) |
+GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync) |
: fFenceSync(fenceSync) { |
this->updateFence(); |
} |
@@ -310,7 +377,7 @@ GpuSync::~GpuSync() { |
} |
void GpuSync::syncToPreviousFrame() { |
- if (kInvalidPlatformFence == fFence) { |
+ if (sk_gpu_test::kInvalidFence == fFence) { |
exitf(ExitErr::kSoftware, "attempted to sync with invalid fence"); |
} |
if (!fFenceSync->waitFence(fFence)) { |
@@ -322,7 +389,7 @@ void GpuSync::syncToPreviousFrame() { |
void GpuSync::updateFence() { |
fFence = fFenceSync->insertFence(); |
- if (kInvalidPlatformFence == fFence) { |
+ if (sk_gpu_test::kInvalidFence == fFence) { |
exitf(ExitErr::kUnavailable, "failed to insert fence"); |
} |
} |