tools/kilobench/kilobench.cpp - Issue 2018603003: Remove VisualBench and its Android implementation.

Side by Side Diff: tools/kilobench/kilobench.cpp

Issue 2018603003: Remove VisualBench and its Android implementation. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /*

2 * Copyright 2016 Google Inc.

3 *

4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.

6 */

7

8 #include "GrCaps.h"

9 #include "GrContextFactory.h"

10 #include "Benchmark.h"

11 #include "ResultsWriter.h"

12 #include "SkCommandLineFlags.h"

13 #include "SkOSFile.h"

14 #include "SkStream.h"

15 #include "SkSurface.h"

16 #include "SkTime.h"

17 #include "SkTLList.h"

18 #include "SkThreadUtils.h"

19 #include "Stats.h"

20 #include "Timer.h"

21 #include "VisualSKPBench.h"

22 #include "gl/GrGLDefines.h"

23 #include "gl/GrGLUtil.h"

24 #include "../private/SkMutex.h"

25 #include "../private/SkSemaphore.h"

26 #include "../private/SkGpuFenceSync.h"

27

28 // posix only for now

29 #include <unistd.h>

30 #include <sys/types.h>

31 #include <sys/wait.h>

32

33 using namespace sk_gpu_test;

34

35 /*

36 * This is an experimental GPU only benchmarking program. The initial implement ation will only

37 * support SKPs.

38 */

39

40 static const int kAutoTuneLoops = 0;

41

42 static const int kDefaultLoops =

43 #ifdef SK_DEBUG

44 1;

45 #else

46 kAutoTuneLoops;

47 #endif

48

49 static SkString loops_help_txt() {

50 SkString help;

51 help.printf("Number of times to run each bench. Set this to %d to auto-"

52 "tune for each bench. Timings are only reported when auto-tuning .",

53 kAutoTuneLoops);

54 return help;

55 }

56

57 DEFINE_string(skps, "skps", "Directory to read skps from.");

58 DEFINE_string2(match, m, nullptr,

59 "[~][^]substring[$] [...] of GM name to run.\n"

60 "Multiple matches may be separated by spaces.\n"

61 "~ causes a matching bench to always be skipped\n"

62 "^ requires the start of the bench to match\n"

63 "$ requires the end of the bench to match\n"

64 "^ and $ requires an exact match\n"

65 "If a bench does not match any list entry,\n"

66 "it is skipped unless some list entry starts with ~");

67 DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU allows to lag.");

68 DEFINE_int32(samples, 10, "Number of samples to measure for each bench.");

69 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this.");

70 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str());

71 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU.");

72 DEFINE_string2(writePath, w, "", "If set, write bitmaps here as .pngs.");

73 DEFINE_bool(useBackgroundThread, true, "If false, kilobench will time cpu / gpu work together");

74 DEFINE_bool(useMultiProcess, true, "If false, kilobench will run all tests in on e process");

75

76 static SkString humanize(double ms) {

77 return HumanizeMs(ms);

78 }

79 #define HUMANIZE(ms) humanize(ms).c_str()

80

81 namespace kilobench {

82 class BenchmarkStream {

83 public:

84 BenchmarkStream() : fCurrentSKP(0) {

85 for (int i = 0; i < FLAGS_skps.count(); i++) {

86 if (SkStrEndsWith(FLAGS_skps[i], ".skp")) {

87 fSKPs.push_back() = FLAGS_skps[i];

88 } else {

89 SkOSFile::Iter it(FLAGS_skps[i], ".skp");

90 SkString path;

91 while (it.next(&path)) {

92 fSKPs.push_back() = SkOSPath::Join(FLAGS_skps[0], path.c_str ());

93 }

94 }

95 }

96 }

97

98 Benchmark* next() {

99 Benchmark* bench = nullptr;

100 // skips non matching benches

101 while ((bench = this->innerNext()) &&

102 (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getUniqueName ()) \|\|

103 !bench->isSuitableFor(Benchmark::kGPU_Backend))) {

104 delete bench;

105 }

106 return bench;

107 }

108

109 private:

110 static sk_sp<SkPicture> ReadPicture(const char path[]) {

111 // Not strictly necessary, as it will be checked again later,

112 // but helps to avoid a lot of pointless work if we're going to skip it.

113 if (SkCommandLineFlags::ShouldSkip(FLAGS_match, path)) {

114 return nullptr;

115 }

116

117 SkAutoTDelete<SkStream> stream(SkStream::NewFromFile(path));

118 if (stream.get() == nullptr) {

119 SkDebugf("Could not read %s.\n", path);

120 return nullptr;

121 }

122

123 return SkPicture::MakeFromStream(stream.get());

124 }

125

126 Benchmark* innerNext() {

127 // Render skps

128 while (fCurrentSKP < fSKPs.count()) {

129 const SkString& path = fSKPs[fCurrentSKP++];

130 auto pic = ReadPicture(path.c_str());

131 if (!pic) {

132 continue;

133 }

134

135 SkString name = SkOSPath::Basename(path.c_str());

136 return new VisualSKPBench(name.c_str(), pic.get());

137 }

138

139 return nullptr;

140 }

141

142 SkTArray<SkString> fSKPs;

143 int fCurrentSKP;

144 };

145

146 struct GPUTarget {

147 void setup() {

148 fGL->makeCurrent();

149 // Make sure we're done with whatever came before.

150 GR_GL_CALL(fGL->gl(), Finish());

151 }

152

153 SkCanvas* beginTiming(SkCanvas* canvas) { return canvas; }

154

155 void endTiming(bool usePlatformSwapBuffers) {

156 if (fGL) {

157 GR_GL_CALL(fGL->gl(), Flush());

158 if (usePlatformSwapBuffers) {

159 fGL->swapBuffers();

160 } else {

161 fGL->waitOnSyncOrSwap();

162 }

163 }

164 }

165 void finish() {

166 GR_GL_CALL(fGL->gl(), Finish());

167 }

168

169 bool needsFrameTiming(int* maxFrameLag) const {

170 if (!fGL->getMaxGpuFrameLag(maxFrameLag)) {

171 // Frame lag is unknown.

172 *maxFrameLag = FLAGS_gpuFrameLag;

173 }

174 return true;

175 }

176

177 bool init(Benchmark* bench, GrContextFactory* factory, bool useDfText,

178 GrContextFactory::ContextType ctxType,

179 GrContextFactory::ContextOptions ctxOptions, int numSamples) {

180 GrContext* context = factory->get(ctxType, ctxOptions);

181 int maxRTSize = context->caps()->maxRenderTargetSize();

182 SkImageInfo info = SkImageInfo::Make(SkTMin(bench->getSize().fX, maxRTSi ze),

183 SkTMin(bench->getSize().fY, maxRTSi ze),

184 kN32_SkColorType, kPremul_SkAlphaT ype);

185 uint32_t flags = useDfText ? SkSurfaceProps::kUseDeviceIndependentFonts_ Flag :

186 0;

187 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);

188 fSurface.reset(SkSurface::MakeRenderTarget(context,

189 SkBudgeted::kNo, info,

190 numSamples, &props).release() );

191 fGL = factory->getContextInfo(ctxType, ctxOptions).glContext();

192 if (!fSurface.get()) {

193 return false;

194 }

195

196 // Kilobench should only be used on platforms with fence sync support

197 SkASSERT(fGL->fenceSyncSupport());

198 return true;

199 }

200

201 SkCanvas* getCanvas() const {

202 if (!fSurface.get()) {

203 return nullptr;

204 }

205 return fSurface->getCanvas();

206 }

207

208 bool capturePixels(SkBitmap* bmp) {

209 SkCanvas* canvas = this->getCanvas();

210 if (!canvas) {

211 return false;

212 }

213 bmp->setInfo(canvas->imageInfo());

214 if (!canvas->readPixels(bmp, 0, 0)) {

215 SkDebugf("Can't read canvas pixels.\n");

216 return false;

217 }

218 return true;

219 }

220

221 GLTestContext* gl() { return fGL; }

222

223 private:

224 GLTestContext* fGL;

225 SkAutoTDelete<SkSurface> fSurface;

226 };

227

228 static bool write_canvas_png(GPUTarget* target, const SkString& filename) {

229

230 if (filename.isEmpty()) {

231 return false;

232 }

233 if (target->getCanvas() &&

234 kUnknown_SkColorType == target->getCanvas()->imageInfo().colorType()) {

235 return false;

236 }

237

238 SkBitmap bmp;

239

240 if (!target->capturePixels(&bmp)) {

241 return false;

242 }

243

244 SkString dir = SkOSPath::Dirname(filename.c_str());

245 if (!sk_mkdir(dir.c_str())) {

246 SkDebugf("Can't make dir %s.\n", dir.c_str());

247 return false;

248 }

249 SkFILEWStream stream(filename.c_str());

250 if (!stream.isValid()) {

251 SkDebugf("Can't write %s.\n", filename.c_str());

252 return false;

253 }

254 if (!SkImageEncoder::EncodeStream(&stream, bmp, SkImageEncoder::kPNG_Type, 1 00)) {

255 SkDebugf("Can't encode a PNG.\n");

256 return false;

257 }

258 return true;

259 }

260

261 static int detect_forever_loops(int loops) {

262 // look for a magic run-forever value

263 if (loops < 0) {

264 loops = SK_MaxS32;

265 }

266 return loops;

267 }

268

269 static int clamp_loops(int loops) {

270 if (loops < 1) {

271 SkDebugf("ERROR: clamping loops from %d to 1. "

272 "There's probably something wrong with the bench.\n", loops);

273 return 1;

274 }

275 if (loops > FLAGS_maxLoops) {

276 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loo ps, FLAGS_maxLoops);

277 return FLAGS_maxLoops;

278 }

279 return loops;

280 }

281

282 static double now_ms() { return SkTime::GetNSecs() * 1e-6; }

283

284 struct TimingThread {

285 TimingThread(GLTestContext* mainContext)

286 : fFenceSync(mainContext->fenceSync())

287 , fMainContext(mainContext)

288 , fDone(false) {}

289

290 static void Loop(void* data) {

291 TimingThread* timingThread = reinterpret_cast<TimingThread*>(data);

292 timingThread->timingLoop();

293 }

294

295 // To ensure waiting for the sync actually does something, we check to make sure the we exceed

296 // some small value

297 const double kMinElapsed = 1e-6;

298 bool sanity(double start) const {

299 double elapsed = now_ms() - start;

300 return elapsed > kMinElapsed;

301 }

302

303 void waitFence(SkPlatformGpuFence sync) {

304 SkDEBUGCODE(double start = now_ms());

305 fFenceSync->waitFence(sync);

306 SkASSERT(sanity(start));

307 }

308

309 void timingLoop() {

310 // Create a context which shares display lists with the main thread

311 SkAutoTDelete<GLTestContext> glContext(CreatePlatformGLTestContext(kNone _GrGLStandard,

312 fMain Context));

313 glContext->makeCurrent();

314

315 // Basic timing methodology is:

316 // 1) Wait on semaphore until main thread indicates its time to start ti ming the frame

317 // 2) Wait on frame start sync, record time. This is start of the frame .

318 // 3) Wait on semaphore until main thread indicates its time to finish t iming the frame

319 // 4) Wait on frame end sync, record time. FrameEndTime - FrameStartTim e = frame time

320 // 5) Wait on semaphore until main thread indicates we should time the n ext frame or quit

321 while (true) {

322 fSemaphore.wait();

323

324 // get start sync

325 SkPlatformGpuFence startSync = this->popStartSync();

326

327 // wait on sync

328 this->waitFence(startSync);

329 double start = kilobench::now_ms();

330

331 // do we want to sleep here?

332 // wait for end sync

333 fSemaphore.wait();

334

335 // get end sync

336 SkPlatformGpuFence endSync = this->popEndSync();

337

338 // wait on sync

339 this->waitFence(endSync);

340 double elapsed = kilobench::now_ms() - start;

341

342 // No mutex needed, client won't touch timings until we're done

343 fTimings.push_back(elapsed);

344

345 // clean up fences

346 fFenceSync->deleteFence(startSync);

347 fFenceSync->deleteFence(endSync);

348

349 fSemaphore.wait();

350 if (this->isDone()) {

351 break;

352 }

353 }

354 }

355

356 void pushStartSync() { this->pushSync(&fFrameStartSyncs, &fFrameStartSyncsMu tex); }

357

358 SkPlatformGpuFence popStartSync() {

359 return this->popSync(&fFrameStartSyncs, &fFrameStartSyncsMutex);

360 }

361

362 void pushEndSync() { this->pushSync(&fFrameEndSyncs, &fFrameEndSyncsMutex); }

363

364 SkPlatformGpuFence popEndSync() { return this->popSync(&fFrameEndSyncs, &fFr ameEndSyncsMutex); }

365

366 void setDone() {

367 SkAutoMutexAcquire done(fDoneMutex);

368 fDone = true;

369 fSemaphore.signal();

370 }

371

372 typedef SkTLList<SkPlatformGpuFence, 1> SyncQueue;

373

374 void pushSync(SyncQueue* queue, SkMutex* mutex) {

375 SkAutoMutexAcquire am(mutex);

376 *queue->addToHead() = fFenceSync->insertFence();

377 fSemaphore.signal();

378 }

379

380 SkPlatformGpuFence popSync(SyncQueue* queue, SkMutex* mutex) {

381 SkAutoMutexAcquire am(mutex);

382 SkPlatformGpuFence sync = *queue->head();

383 queue->popHead();

384 return sync;

385 }

386

387 bool isDone() {

388 SkAutoMutexAcquire am1(fFrameStartSyncsMutex);

389 SkAutoMutexAcquire done(fDoneMutex);

390 if (fDone && fFrameStartSyncs.isEmpty()) {

391 return true;

392 } else {

393 return false;

394 }

395 }

396

397 const SkTArray<double>& timings() const { SkASSERT(fDone); return fTimings; }

398

399 private:

400 SkGpuFenceSync* fFenceSync;

401 SkSemaphore fSemaphore;

402 SkMutex fFrameStartSyncsMutex;

403 SyncQueue fFrameStartSyncs;

404 SkMutex fFrameEndSyncsMutex;

405 SyncQueue fFrameEndSyncs;

406 SkTArray<double> fTimings;

407 SkMutex fDoneMutex;

408 GLTestContext* fMainContext;

409 bool fDone;

410 };

411

412 static double time(int loops, Benchmark* bench, GPUTarget* target, TimingThread* timingThread) {

413 SkCanvas* canvas = target->getCanvas();

414 canvas->clear(SK_ColorWHITE);

415 bench->preDraw(canvas);

416

417 if (timingThread) {

418 timingThread->pushStartSync();

419 }

420 double start = now_ms();

421 canvas = target->beginTiming(canvas);

422 bench->draw(loops, canvas);

423 canvas->flush();

424 target->endTiming(timingThread ? true : false);

425

426 double elapsed = now_ms() - start;

427 if (timingThread) {

428 timingThread->pushEndSync();

429 timingThread->setDone();

430 }

431 bench->postDraw(canvas);

432 return elapsed;

433 }

434

435 // TODO For now we don't use the background timing thread to tune loops

436 static int setup_gpu_bench(GPUTarget* target, Benchmark* bench, int maxGpuFrameL ag) {

437 // First, figure out how many loops it'll take to get a frame up to FLAGS_gp uMs.

438 int loops = bench->calculateLoops(FLAGS_loops);

439 if (kAutoTuneLoops == loops) {

440 loops = 1;

441 double elapsed = 0;

442 do {

443 if (1<<30 == loops) {

444 // We're about to wrap. Something's wrong with the bench.

445 loops = 0;

446 break;

447 }

448 loops *= 2;

449 // If the GPU lets frames lag at all, we need to make sure we're tim ing

450 // _this_ round, not still timing last round.

451 for (int i = 0; i < maxGpuFrameLag; i++) {

452 elapsed = time(loops, bench, target, nullptr);

453 }

454 } while (elapsed < FLAGS_gpuMs);

455

456 // We've overshot at least a little. Scale back linearly.

457 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed);

458 loops = clamp_loops(loops);

459

460 // Make sure we're not still timing our calibration.

461 target->finish();

462 } else {

463 loops = detect_forever_loops(loops);

464 }

465

466 // Pretty much the same deal as the calibration: do some warmup to make

467 // sure we're timing steady-state pipelined frames.

468 for (int i = 0; i < maxGpuFrameLag - 1; i++) {

469 time(loops, bench, target, nullptr);

470 }

471

472 return loops;

473 }

474

475 struct AutoSetupContextBenchAndTarget {

476 AutoSetupContextBenchAndTarget(Benchmark* bench) : fBenchmark(bench) {

477 GrContextOptions grContextOpts;

478 fCtxFactory.reset(new GrContextFactory(grContextOpts));

479

480 SkAssertResult(fTarget.init(bench, fCtxFactory, false,

481 GrContextFactory::kNativeGL_ContextType,

482 GrContextFactory::kNone_ContextOptions, 0));

483

484 fCanvas = fTarget.getCanvas();

485 fTarget.setup();

486

487 bench->perCanvasPreDraw(fCanvas);

488 fTarget.needsFrameTiming(&fMaxFrameLag);

489 }

490

491 int getLoops() { return setup_gpu_bench(&fTarget, fBenchmark, fMaxFrameLag); }

492

493 double timeSample(int loops, TimingThread* timingThread) {

494 for (int i = 0; i < fMaxFrameLag; i++) {

495 time(loops, fBenchmark, &fTarget, timingThread);

496 }

497

498 return time(loops, fBenchmark, &fTarget, timingThread) / loops;

499 }

500

501 void teardownBench() { fBenchmark->perCanvasPostDraw(fCanvas); }

502

503 SkAutoTDelete<GrContextFactory> fCtxFactory;

504 GPUTarget fTarget;

505 SkCanvas* fCanvas;

506 Benchmark* fBenchmark;

507 int fMaxFrameLag;

508 };

509

510 int setup_loops(Benchmark* bench) {

511 AutoSetupContextBenchAndTarget ascbt(bench);

512 int loops = ascbt.getLoops();

513 ascbt.teardownBench();

514

515 if (!FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) {

516 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], "gpu");

517 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName() );

518 pngFilename.append(".png");

519 write_canvas_png(&ascbt.fTarget, pngFilename);

520 }

521 return loops;

522 }

523

524 struct Sample {

525 double fCpu;

526 double fGpu;

527 };

528

529 Sample time_sample(Benchmark* bench, int loops) {

530 AutoSetupContextBenchAndTarget ascbt(bench);

531

532 Sample sample;

533 if (FLAGS_useBackgroundThread) {

534 TimingThread timingThread(ascbt.fTarget.gl());

535 SkAutoTDelete<SkThread> nativeThread(new SkThread(TimingThread::Loop, &t imingThread));

536 nativeThread->start();

537 sample.fCpu = ascbt.timeSample(loops, &timingThread);

538 nativeThread->join();

539

540 // return the min

541 double min = SK_ScalarMax;

542 for (int i = 0; i < timingThread.timings().count(); i++) {

543 min = SkTMin(min, timingThread.timings()[i]);

544 }

545 sample.fGpu = min;

546 } else {

547 sample.fCpu = ascbt.timeSample(loops, nullptr);

548 }

549

550 ascbt.teardownBench();

551

552 return sample;

553 }

554

555 } // namespace kilobench

556

557 static const int kOutResultSize = 1024;

558

559 void printResult(const SkTArray<double>& samples, int loops, const char* name, c onst char* mod) {

560 SkString newName(name);

561 newName.appendf("_%s", mod);

562 Stats stats(samples);

563 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean;

564 SkDebugf("%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n"

565 , loops

566 , HUMANIZE(stats.min)

567 , HUMANIZE(stats.median)

568 , HUMANIZE(stats.mean)

569 , HUMANIZE(stats.max)

570 , stddev_percent

571 , stats.plot.c_str()

572 , "gpu"

573 , newName.c_str()

574 );

575 }

576

577 int kilobench_main() {

578 kilobench::BenchmarkStream benchStream;

579

580 SkDebugf("loops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n",

581 FLAGS_samples, "samples");

582

583 int descriptors[2];

584 if (pipe(descriptors) != 0) {

585 SkFAIL("Failed to open a pipe\n");

586 }

587

588 while (Benchmark* b = benchStream.next()) {

589 SkAutoTDelete<Benchmark> bench(b);

590

591 int loops = 1;

592 SkTArray<double> cpuSamples;

593 SkTArray<double> gpuSamples;

594 for (int i = 0; i < FLAGS_samples + 1; i++) {

595 // We fork off a new process to setup the grcontext and run the test while we wait

596 if (FLAGS_useMultiProcess) {

597 int childPid = fork();

598 if (childPid > 0) {

599 char result[kOutResultSize];

600 if (read(descriptors[0], result, kOutResultSize) < 0) {

601 SkFAIL("Failed to read from pipe\n");

602 }

603

604 // if samples == 0 then parse # of loops

605 // else parse float

606 if (i == 0) {

607 sscanf(result, "%d", &loops);

608 } else {

609 sscanf(result, "%lf %lf", &cpuSamples.push_back(),

610 &gpuSamples.push_back());

611 }

612

613 // wait until exit

614 int status;

615 waitpid(childPid, &status, 0);

616 } else if (0 == childPid) {

617 char result[kOutResultSize];

618 if (i == 0) {

619 sprintf(result, "%d", kilobench::setup_loops(bench));

620 } else {

621 kilobench::Sample sample = kilobench::time_sample(bench, loops);

622 sprintf(result, "%lf %lf", sample.fCpu, sample.fGpu);

623 }

624

625 // Make sure to write the null terminator

626 if (write(descriptors[1], result, strlen(result) + 1) < 0) {

627 SkFAIL("Failed to write to pipe\n");

628 }

629 return 0;

630 } else {

631 SkFAIL("Fork failed\n");

632 }

633 } else {

634 if (i == 0) {

635 loops = kilobench::setup_loops(bench);

636 } else {

637 kilobench::Sample sample = kilobench::time_sample(bench, loo ps);

638 cpuSamples.push_back(sample.fCpu);

639 gpuSamples.push_back(sample.fGpu);

640 }

641 }

642 }

643

644 printResult(cpuSamples, loops, bench->getUniqueName(), "cpu");

645 if (FLAGS_useBackgroundThread) {

646 printResult(gpuSamples, loops, bench->getUniqueName(), "gpu");

647 }

648 }

649 return 0;

650 }

651

652 #if !defined SK_BUILD_FOR_IOS

653 int main(int argc, char** argv) {

654 SkCommandLineFlags::Parse(argc, argv);

655 return kilobench_main();

656 }

657 #endif

OLD	NEW

« no previous file with comments | « tools/VisualBench/WrappedBenchmark.h ('k') | no next file » | no next file with comments »