Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(219)

Side by Side Diff: tools/kilobench/kilobench.cpp

Issue 2018603003: Remove VisualBench and its Android implementation. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/VisualBench/WrappedBenchmark.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "GrCaps.h"
9 #include "GrContextFactory.h"
10 #include "Benchmark.h"
11 #include "ResultsWriter.h"
12 #include "SkCommandLineFlags.h"
13 #include "SkOSFile.h"
14 #include "SkStream.h"
15 #include "SkSurface.h"
16 #include "SkTime.h"
17 #include "SkTLList.h"
18 #include "SkThreadUtils.h"
19 #include "Stats.h"
20 #include "Timer.h"
21 #include "VisualSKPBench.h"
22 #include "gl/GrGLDefines.h"
23 #include "gl/GrGLUtil.h"
24 #include "../private/SkMutex.h"
25 #include "../private/SkSemaphore.h"
26 #include "../private/SkGpuFenceSync.h"
27
28 // posix only for now
29 #include <unistd.h>
30 #include <sys/types.h>
31 #include <sys/wait.h>
32
33 using namespace sk_gpu_test;
34
35 /*
36 * This is an experimental GPU only benchmarking program. The initial implement ation will only
37 * support SKPs.
38 */
39
40 static const int kAutoTuneLoops = 0;
41
42 static const int kDefaultLoops =
43 #ifdef SK_DEBUG
44 1;
45 #else
46 kAutoTuneLoops;
47 #endif
48
49 static SkString loops_help_txt() {
50 SkString help;
51 help.printf("Number of times to run each bench. Set this to %d to auto-"
52 "tune for each bench. Timings are only reported when auto-tuning .",
53 kAutoTuneLoops);
54 return help;
55 }
56
57 DEFINE_string(skps, "skps", "Directory to read skps from.");
58 DEFINE_string2(match, m, nullptr,
59 "[~][^]substring[$] [...] of GM name to run.\n"
60 "Multiple matches may be separated by spaces.\n"
61 "~ causes a matching bench to always be skipped\n"
62 "^ requires the start of the bench to match\n"
63 "$ requires the end of the bench to match\n"
64 "^ and $ requires an exact match\n"
65 "If a bench does not match any list entry,\n"
66 "it is skipped unless some list entry starts with ~");
67 DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU allows to lag.");
68 DEFINE_int32(samples, 10, "Number of samples to measure for each bench.");
69 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this.");
70 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str());
71 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU.");
72 DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs.");
73 DEFINE_bool(useBackgroundThread, true, "If false, kilobench will time cpu / gpu work together");
74 DEFINE_bool(useMultiProcess, true, "If false, kilobench will run all tests in on e process");
75
76 static SkString humanize(double ms) {
77 return HumanizeMs(ms);
78 }
79 #define HUMANIZE(ms) humanize(ms).c_str()
80
81 namespace kilobench {
82 class BenchmarkStream {
83 public:
84 BenchmarkStream() : fCurrentSKP(0) {
85 for (int i = 0; i < FLAGS_skps.count(); i++) {
86 if (SkStrEndsWith(FLAGS_skps[i], ".skp")) {
87 fSKPs.push_back() = FLAGS_skps[i];
88 } else {
89 SkOSFile::Iter it(FLAGS_skps[i], ".skp");
90 SkString path;
91 while (it.next(&path)) {
92 fSKPs.push_back() = SkOSPath::Join(FLAGS_skps[0], path.c_str ());
93 }
94 }
95 }
96 }
97
98 Benchmark* next() {
99 Benchmark* bench = nullptr;
100 // skips non matching benches
101 while ((bench = this->innerNext()) &&
102 (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getUniqueName ()) ||
103 !bench->isSuitableFor(Benchmark::kGPU_Backend))) {
104 delete bench;
105 }
106 return bench;
107 }
108
109 private:
110 static sk_sp<SkPicture> ReadPicture(const char path[]) {
111 // Not strictly necessary, as it will be checked again later,
112 // but helps to avoid a lot of pointless work if we're going to skip it.
113 if (SkCommandLineFlags::ShouldSkip(FLAGS_match, path)) {
114 return nullptr;
115 }
116
117 SkAutoTDelete<SkStream> stream(SkStream::NewFromFile(path));
118 if (stream.get() == nullptr) {
119 SkDebugf("Could not read %s.\n", path);
120 return nullptr;
121 }
122
123 return SkPicture::MakeFromStream(stream.get());
124 }
125
126 Benchmark* innerNext() {
127 // Render skps
128 while (fCurrentSKP < fSKPs.count()) {
129 const SkString& path = fSKPs[fCurrentSKP++];
130 auto pic = ReadPicture(path.c_str());
131 if (!pic) {
132 continue;
133 }
134
135 SkString name = SkOSPath::Basename(path.c_str());
136 return new VisualSKPBench(name.c_str(), pic.get());
137 }
138
139 return nullptr;
140 }
141
142 SkTArray<SkString> fSKPs;
143 int fCurrentSKP;
144 };
145
146 struct GPUTarget {
147 void setup() {
148 fGL->makeCurrent();
149 // Make sure we're done with whatever came before.
150 GR_GL_CALL(fGL->gl(), Finish());
151 }
152
153 SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; }
154
155 void endTiming(bool usePlatformSwapBuffers) {
156 if (fGL) {
157 GR_GL_CALL(fGL->gl(), Flush());
158 if (usePlatformSwapBuffers) {
159 fGL->swapBuffers();
160 } else {
161 fGL->waitOnSyncOrSwap();
162 }
163 }
164 }
165 void finish() {
166 GR_GL_CALL(fGL->gl(), Finish());
167 }
168
169 bool needsFrameTiming(int* maxFrameLag) const {
170 if (!fGL->getMaxGpuFrameLag(maxFrameLag)) {
171 // Frame lag is unknown.
172 *maxFrameLag = FLAGS_gpuFrameLag;
173 }
174 return true;
175 }
176
177 bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText,
178 GrContextFactory::ContextType ctxType,
179 GrContextFactory::ContextOptions ctxOptions, int numSamples) {
180 GrContext* context = factory->get(ctxType, ctxOptions);
181 int maxRTSize = context->caps()->maxRenderTargetSize();
182 SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSi ze),
183 SkTMin(bench->getSize().fY, maxRTSi ze),
184 kN32_SkColorType, kPremul_SkAlphaT ype);
185 uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_ Flag :
186 0;
187 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);
188 fSurface.reset(SkSurface::MakeRenderTarget(context,
189 SkBudgeted::kNo, info,
190 numSamples, &props).release() );
191 fGL = factory->getContextInfo(ctxType, ctxOptions).glContext();
192 if (!fSurface.get()) {
193 return false;
194 }
195
196 // Kilobench should only be used on platforms with fence sync support
197 SkASSERT(fGL->fenceSyncSupport());
198 return true;
199 }
200
201 SkCanvas* getCanvas() const {
202 if (!fSurface.get()) {
203 return nullptr;
204 }
205 return fSurface->getCanvas();
206 }
207
208 bool capturePixels(SkBitmap* bmp) {
209 SkCanvas* canvas = this->getCanvas();
210 if (!canvas) {
211 return false;
212 }
213 bmp->setInfo(canvas->imageInfo());
214 if (!canvas->readPixels(bmp, 0, 0)) {
215 SkDebugf("Can't read canvas pixels.\n");
216 return false;
217 }
218 return true;
219 }
220
221 GLTestContext* gl() { return fGL; }
222
223 private:
224 GLTestContext* fGL;
225 SkAutoTDelete<SkSurface> fSurface;
226 };
227
228 static bool write_canvas_png(GPUTarget* target, const SkString& filename) {
229
230 if (filename.isEmpty()) {
231 return false;
232 }
233 if (target->getCanvas() &&
234 kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) {
235 return false;
236 }
237
238 SkBitmap bmp;
239
240 if (!target->capturePixels(&bmp)) {
241 return false;
242 }
243
244 SkString dir = SkOSPath::Dirname(filename.c_str());
245 if (!sk_mkdir(dir.c_str())) {
246 SkDebugf("Can't make dir %s.\n", dir.c_str());
247 return false;
248 }
249 SkFILEWStream stream(filename.c_str());
250 if (!stream.isValid()) {
251 SkDebugf("Can't write %s.\n", filename.c_str());
252 return false;
253 }
254 if (!SkImageEncoder::EncodeStream(&stream, bmp, SkImageEncoder::kPNG_Type, 1 00)) {
255 SkDebugf("Can't encode a PNG.\n");
256 return false;
257 }
258 return true;
259 }
260
261 static int detect_forever_loops(int loops) {
262 // look for a magic run-forever value
263 if (loops < 0) {
264 loops = SK_MaxS32;
265 }
266 return loops;
267 }
268
269 static int clamp_loops(int loops) {
270 if (loops < 1) {
271 SkDebugf("ERROR: clamping loops from %d to 1. "
272 "There's probably something wrong with the bench.\n", loops);
273 return 1;
274 }
275 if (loops > FLAGS_maxLoops) {
276 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo ps, FLAGS_maxLoops);
277 return FLAGS_maxLoops;
278 }
279 return loops;
280 }
281
282 static double now_ms() { return SkTime::GetNSecs() * 1e-6; }
283
284 struct TimingThread {
285 TimingThread(GLTestContext* mainContext)
286 : fFenceSync(mainContext->fenceSync())
287 , fMainContext(mainContext)
288 , fDone(false) {}
289
290 static void Loop(void* data) {
291 TimingThread* timingThread = reinterpret_cast<TimingThread*>(data);
292 timingThread->timingLoop();
293 }
294
295 // To ensure waiting for the sync actually does something, we check to make sure the we exceed
296 // some small value
297 const double kMinElapsed = 1e-6;
298 bool sanity(double start) const {
299 double elapsed = now_ms() - start;
300 return elapsed > kMinElapsed;
301 }
302
303 void waitFence(SkPlatformGpuFence sync) {
304 SkDEBUGCODE(double start = now_ms());
305 fFenceSync->waitFence(sync);
306 SkASSERT(sanity(start));
307 }
308
309 void timingLoop() {
310 // Create a context which shares display lists with the main thread
311 SkAutoTDelete<GLTestContext> glContext(CreatePlatformGLTestContext(kNone _GrGLStandard,
312 fMain Context));
313 glContext->makeCurrent();
314
315 // Basic timing methodology is:
316 // 1) Wait on semaphore until main thread indicates its time to start ti ming the frame
317 // 2) Wait on frame start sync, record time. This is start of the frame .
318 // 3) Wait on semaphore until main thread indicates its time to finish t iming the frame
319 // 4) Wait on frame end sync, record time. FrameEndTime - FrameStartTim e = frame time
320 // 5) Wait on semaphore until main thread indicates we should time the n ext frame or quit
321 while (true) {
322 fSemaphore.wait();
323
324 // get start sync
325 SkPlatformGpuFence startSync = this->popStartSync();
326
327 // wait on sync
328 this->waitFence(startSync);
329 double start = kilobench::now_ms();
330
331 // do we want to sleep here?
332 // wait for end sync
333 fSemaphore.wait();
334
335 // get end sync
336 SkPlatformGpuFence endSync = this->popEndSync();
337
338 // wait on sync
339 this->waitFence(endSync);
340 double elapsed = kilobench::now_ms() - start;
341
342 // No mutex needed, client won't touch timings until we're done
343 fTimings.push_back(elapsed);
344
345 // clean up fences
346 fFenceSync->deleteFence(startSync);
347 fFenceSync->deleteFence(endSync);
348
349 fSemaphore.wait();
350 if (this->isDone()) {
351 break;
352 }
353 }
354 }
355
356 void pushStartSync() { this->pushSync(&fFrameStartSyncs, &fFrameStartSyncsMu tex); }
357
358 SkPlatformGpuFence popStartSync() {
359 return this->popSync(&fFrameStartSyncs, &fFrameStartSyncsMutex);
360 }
361
362 void pushEndSync() { this->pushSync(&fFrameEndSyncs, &fFrameEndSyncsMutex); }
363
364 SkPlatformGpuFence popEndSync() { return this->popSync(&fFrameEndSyncs, &fFr ameEndSyncsMutex); }
365
366 void setDone() {
367 SkAutoMutexAcquire done(fDoneMutex);
368 fDone = true;
369 fSemaphore.signal();
370 }
371
372 typedef SkTLList<SkPlatformGpuFence, 1> SyncQueue;
373
374 void pushSync(SyncQueue* queue, SkMutex* mutex) {
375 SkAutoMutexAcquire am(mutex);
376 *queue->addToHead() = fFenceSync->insertFence();
377 fSemaphore.signal();
378 }
379
380 SkPlatformGpuFence popSync(SyncQueue* queue, SkMutex* mutex) {
381 SkAutoMutexAcquire am(mutex);
382 SkPlatformGpuFence sync = *queue->head();
383 queue->popHead();
384 return sync;
385 }
386
387 bool isDone() {
388 SkAutoMutexAcquire am1(fFrameStartSyncsMutex);
389 SkAutoMutexAcquire done(fDoneMutex);
390 if (fDone && fFrameStartSyncs.isEmpty()) {
391 return true;
392 } else {
393 return false;
394 }
395 }
396
397 const SkTArray<double>& timings() const { SkASSERT(fDone); return fTimings; }
398
399 private:
400 SkGpuFenceSync* fFenceSync;
401 SkSemaphore fSemaphore;
402 SkMutex fFrameStartSyncsMutex;
403 SyncQueue fFrameStartSyncs;
404 SkMutex fFrameEndSyncsMutex;
405 SyncQueue fFrameEndSyncs;
406 SkTArray<double> fTimings;
407 SkMutex fDoneMutex;
408 GLTestContext* fMainContext;
409 bool fDone;
410 };
411
412 static double time(int loops, Benchmark* bench, GPUTarget* target, TimingThread* timingThread) {
413 SkCanvas* canvas = target->getCanvas();
414 canvas->clear(SK_ColorWHITE);
415 bench->preDraw(canvas);
416
417 if (timingThread) {
418 timingThread->pushStartSync();
419 }
420 double start = now_ms();
421 canvas = target->beginTiming(canvas);
422 bench->draw(loops, canvas);
423 canvas->flush();
424 target->endTiming(timingThread ? true : false);
425
426 double elapsed = now_ms() - start;
427 if (timingThread) {
428 timingThread->pushEndSync();
429 timingThread->setDone();
430 }
431 bench->postDraw(canvas);
432 return elapsed;
433 }
434
435 // TODO For now we don't use the background timing thread to tune loops
436 static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL ag) {
437 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp uMs.
438 int loops = bench->calculateLoops(FLAGS_loops);
439 if (kAutoTuneLoops == loops) {
440 loops = 1;
441 double elapsed = 0;
442 do {
443 if (1<<30 == loops) {
444 // We're about to wrap. Something's wrong with the bench.
445 loops = 0;
446 break;
447 }
448 loops *= 2;
449 // If the GPU lets frames lag at all, we need to make sure we're tim ing
450 // _this_ round, not still timing last round.
451 for (int i = 0; i < maxGpuFrameLag; i++) {
452 elapsed = time(loops, bench, target, nullptr);
453 }
454 } while (elapsed < FLAGS_gpuMs);
455
456 // We've overshot at least a little. Scale back linearly.
457 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed);
458 loops = clamp_loops(loops);
459
460 // Make sure we're not still timing our calibration.
461 target->finish();
462 } else {
463 loops = detect_forever_loops(loops);
464 }
465
466 // Pretty much the same deal as the calibration: do some warmup to make
467 // sure we're timing steady-state pipelined frames.
468 for (int i = 0; i < maxGpuFrameLag - 1; i++) {
469 time(loops, bench, target, nullptr);
470 }
471
472 return loops;
473 }
474
475 struct AutoSetupContextBenchAndTarget {
476 AutoSetupContextBenchAndTarget(Benchmark* bench) : fBenchmark(bench) {
477 GrContextOptions grContextOpts;
478 fCtxFactory.reset(new GrContextFactory(grContextOpts));
479
480 SkAssertResult(fTarget.init(bench, fCtxFactory, false,
481 GrContextFactory::kNativeGL_ContextType,
482 GrContextFactory::kNone_ContextOptions, 0));
483
484 fCanvas = fTarget.getCanvas();
485 fTarget.setup();
486
487 bench->perCanvasPreDraw(fCanvas);
488 fTarget.needsFrameTiming(&fMaxFrameLag);
489 }
490
491 int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag); }
492
493 double timeSample(int loops, TimingThread* timingThread) {
494 for (int i = 0; i < fMaxFrameLag; i++) {
495 time(loops, fBenchmark, &fTarget, timingThread);
496 }
497
498 return time(loops, fBenchmark, &fTarget, timingThread) / loops;
499 }
500
501 void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); }
502
503 SkAutoTDelete<GrContextFactory> fCtxFactory;
504 GPUTarget fTarget;
505 SkCanvas* fCanvas;
506 Benchmark* fBenchmark;
507 int fMaxFrameLag;
508 };
509
510 int setup_loops(Benchmark* bench) {
511 AutoSetupContextBenchAndTarget ascbt(bench);
512 int loops = ascbt.getLoops();
513 ascbt.teardownBench();
514
515 if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) {
516 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu");
517 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName() );
518 pngFilename.append(".png");
519 write_canvas_png(&ascbt.fTarget, pngFilename);
520 }
521 return loops;
522 }
523
524 struct Sample {
525 double fCpu;
526 double fGpu;
527 };
528
529 Sample time_sample(Benchmark* bench, int loops) {
530 AutoSetupContextBenchAndTarget ascbt(bench);
531
532 Sample sample;
533 if (FLAGS_useBackgroundThread) {
534 TimingThread timingThread(ascbt.fTarget.gl());
535 SkAutoTDelete<SkThread> nativeThread(new SkThread(TimingThread::Loop, &t imingThread));
536 nativeThread->start();
537 sample.fCpu = ascbt.timeSample(loops, &timingThread);
538 nativeThread->join();
539
540 // return the min
541 double min = SK_ScalarMax;
542 for (int i = 0; i < timingThread.timings().count(); i++) {
543 min = SkTMin(min, timingThread.timings()[i]);
544 }
545 sample.fGpu = min;
546 } else {
547 sample.fCpu = ascbt.timeSample(loops, nullptr);
548 }
549
550 ascbt.teardownBench();
551
552 return sample;
553 }
554
555 } // namespace kilobench
556
557 static const int kOutResultSize = 1024;
558
559 void printResult(const SkTArray<double>& samples, int loops, const char* name, c onst char* mod) {
560 SkString newName(name);
561 newName.appendf("_%s", mod);
562 Stats stats(samples);
563 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean;
564 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n"
565 , loops
566 , HUMANIZE(stats.min)
567 , HUMANIZE(stats.median)
568 , HUMANIZE(stats.mean)
569 , HUMANIZE(stats.max)
570 , stddev_percent
571 , stats.plot.c_str()
572 , "gpu"
573 , newName.c_str()
574 );
575 }
576
577 int kilobench_main() {
578 kilobench::BenchmarkStream benchStream;
579
580 SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n",
581 FLAGS_samples, "samples");
582
583 int descriptors[2];
584 if (pipe(descriptors) != 0) {
585 SkFAIL("Failed to open a pipe\n");
586 }
587
588 while (Benchmark* b = benchStream.next()) {
589 SkAutoTDelete<Benchmark> bench(b);
590
591 int loops = 1;
592 SkTArray<double> cpuSamples;
593 SkTArray<double> gpuSamples;
594 for (int i = 0; i < FLAGS_samples + 1; i++) {
595 // We fork off a new process to setup the grcontext and run the test while we wait
596 if (FLAGS_useMultiProcess) {
597 int childPid = fork();
598 if (childPid > 0) {
599 char result[kOutResultSize];
600 if (read(descriptors[0], result, kOutResultSize) < 0) {
601 SkFAIL("Failed to read from pipe\n");
602 }
603
604 // if samples == 0 then parse # of loops
605 // else parse float
606 if (i == 0) {
607 sscanf(result, "%d", &loops);
608 } else {
609 sscanf(result, "%lf %lf", &cpuSamples.push_back(),
610 &gpuSamples.push_back());
611 }
612
613 // wait until exit
614 int status;
615 waitpid(childPid, &status, 0);
616 } else if (0 == childPid) {
617 char result[kOutResultSize];
618 if (i == 0) {
619 sprintf(result, "%d", kilobench::setup_loops(bench));
620 } else {
621 kilobench::Sample sample = kilobench::time_sample(bench, loops);
622 sprintf(result, "%lf %lf", sample.fCpu, sample.fGpu);
623 }
624
625 // Make sure to write the null terminator
626 if (write(descriptors[1], result, strlen(result) + 1) < 0) {
627 SkFAIL("Failed to write to pipe\n");
628 }
629 return 0;
630 } else {
631 SkFAIL("Fork failed\n");
632 }
633 } else {
634 if (i == 0) {
635 loops = kilobench::setup_loops(bench);
636 } else {
637 kilobench::Sample sample = kilobench::time_sample(bench, loo ps);
638 cpuSamples.push_back(sample.fCpu);
639 gpuSamples.push_back(sample.fGpu);
640 }
641 }
642 }
643
644 printResult(cpuSamples, loops, bench->getUniqueName(), "cpu");
645 if (FLAGS_useBackgroundThread) {
646 printResult(gpuSamples, loops, bench->getUniqueName(), "gpu");
647 }
648 }
649 return 0;
650 }
651
652 #if !defined SK_BUILD_FOR_IOS
653 int main(int argc, char** argv) {
654 SkCommandLineFlags::Parse(argc, argv);
655 return kilobench_main();
656 }
657 #endif
OLDNEW
« no previous file with comments | « tools/VisualBench/WrappedBenchmark.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698