| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include <tuple> | 8 #include <tuple> |
| 9 | 9 |
| 10 #include "Benchmark.h" | 10 #include "Benchmark.h" |
| 11 #include "Resources.h" | 11 #include "Resources.h" |
| 12 #include "SkCpu.h" | 12 #include "SkCpu.h" |
| 13 #include "SkImage.h" | 13 #include "SkImage.h" |
| 14 #include "SkImage_Base.h" | 14 #include "SkImage_Base.h" |
| 15 #include "SkNx.h" | 15 #include "SkNx.h" |
| 16 #include "SkOpts.h" | 16 #include "SkOpts.h" |
| 17 #include "SkPM4fPriv.h" |
| 17 #include "SkString.h" | 18 #include "SkString.h" |
| 18 | 19 |
| 19 #define INNER_LOOPS 10 | 20 #define INNER_LOOPS 10 |
| 20 | 21 |
| 21 namespace sk_default { | 22 static void brute_force_srcover_srgb_srgb( |
| 22 extern void brute_force_srcover_srgb_srgb( | 23 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { |
| 23 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); | 24 while (ndst > 0) { |
| 25 int n = SkTMin(ndst, nsrc); |
| 26 |
| 27 for (int i = 0; i < n; i++) { |
| 28 srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i]))); |
| 29 } |
| 30 ndst -= n; |
| 31 } |
| 32 } |
| 33 |
| 34 static void best_non_simd_srcover_srgb_srgb( |
| 35 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { |
| 36 uint64_t* ddst = reinterpret_cast<uint64_t*>(dst); |
| 37 |
| 38 auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) { |
| 39 srcover_srgb8888_srgb_1(dst++, *src++); |
| 40 srcover_srgb8888_srgb_1(dst, *src); |
| 41 }; |
| 42 |
| 43 while (ndst >0) { |
| 44 int count = SkTMin(ndst, nsrc); |
| 45 ndst -= count; |
| 46 const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src); |
| 47 const uint64_t* end = dsrc + (count >> 1); |
| 48 do { |
| 49 if ((~*dsrc & 0xFF000000FF000000) == 0) { |
| 50 do { |
| 51 *ddst++ = *dsrc++; |
| 52 } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0); |
| 53 } else if ((*dsrc & 0xFF000000FF000000) == 0) { |
| 54 do { |
| 55 dsrc++; |
| 56 ddst++; |
| 57 } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0); |
| 58 } else { |
| 59 srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++), |
| 60 reinterpret_cast<const uint32_t*>(dsrc++)); |
| 61 } |
| 62 } while (dsrc < end); |
| 63 |
| 64 if ((count & 1) != 0) { |
| 65 srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst), |
| 66 *reinterpret_cast<const uint32_t*>(dsrc)); |
| 67 } |
| 68 } |
| 69 } |
| 70 |
| 71 static void trivial_srcover_srgb_srgb( |
| 72 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { |
| 73 while (ndst > 0) { |
| 74 int n = SkTMin(ndst, nsrc); |
| 75 |
| 76 for (int i = 0; i < n; i++) { |
| 77 srcover_srgb8888_srgb_1(dst++, src[i]); |
| 78 } |
| 79 ndst -= n; |
| 80 } |
| 24 } | 81 } |
| 25 | 82 |
| 26 class SrcOverVSkOptsBruteForce { | 83 class SrcOverVSkOptsBruteForce { |
| 27 public: | 84 public: |
| 28 static SkString Name() { return SkString{"VSkOptsBruteForce"}; } | 85 static SkString Name() { return SkString{"VSkOptsBruteForce"}; } |
| 29 static bool WorksOnCpu() { return true; } | 86 static bool WorksOnCpu() { return true; } |
| 30 static void BlendN(uint32_t* dst, int count, const uint32_t* src) { | 87 static void BlendN(uint32_t* dst, const uint32_t* src, int count) { |
| 31 sk_default::brute_force_srcover_srgb_srgb(dst, src, count, count); | 88 brute_force_srcover_srgb_srgb(dst, src, count, count); |
| 32 } | 89 } |
| 33 }; | 90 }; |
| 34 | 91 |
| 35 namespace sk_default { | |
| 36 extern void trivial_srcover_srgb_srgb( | |
| 37 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); | |
| 38 } | |
| 39 | |
| 40 class SrcOverVSkOptsTrivial { | 92 class SrcOverVSkOptsTrivial { |
| 41 public: | 93 public: |
| 42 static SkString Name() { return SkString{"VSkOptsTrivial"}; } | 94 static SkString Name() { return SkString{"VSkOptsTrivial"}; } |
| 43 static bool WorksOnCpu() { return true; } | 95 static bool WorksOnCpu() { return true; } |
| 44 static void BlendN(uint32_t* dst, int count, const uint32_t* src) { | 96 static void BlendN(uint32_t* dst, const uint32_t* src, int count) { |
| 45 sk_default::trivial_srcover_srgb_srgb(dst, src, count, count); | 97 trivial_srcover_srgb_srgb(dst, src, count, count); |
| 46 } | 98 } |
| 47 }; | 99 }; |
| 48 | 100 |
| 49 namespace sk_default { | |
| 50 extern void best_non_simd_srcover_srgb_srgb( | |
| 51 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); | |
| 52 } | |
| 53 | |
| 54 class SrcOverVSkOptsNonSimdCore { | 101 class SrcOverVSkOptsNonSimdCore { |
| 55 public: | 102 public: |
| 56 static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; } | 103 static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; } |
| 57 static bool WorksOnCpu() { return true; } | 104 static bool WorksOnCpu() { return true; } |
| 58 static void BlendN(uint32_t* dst, int count, const uint32_t* src) { | 105 static void BlendN(uint32_t* dst, const uint32_t* src, int count) { |
| 59 sk_default::best_non_simd_srcover_srgb_srgb(dst, src, count, count); | 106 best_non_simd_srcover_srgb_srgb(dst, src, count, count); |
| 60 } | 107 } |
| 61 }; | 108 }; |
| 62 | 109 |
| 63 namespace sk_default { | 110 namespace sk_default { |
| 64 extern void srcover_srgb_srgb( | 111 extern void srcover_srgb_srgb( |
| 65 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); | 112 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); |
| 66 } | 113 } |
| 67 | 114 |
| 68 class SrcOverVSkOptsDefault { | 115 class SrcOverVSkOptsDefault { |
| 69 public: | 116 public: |
| 70 static SkString Name() { return SkString{"VSkOptsDefault"}; } | 117 static SkString Name() { return SkString{"VSkOptsDefault"}; } |
| 71 static bool WorksOnCpu() { return true; } | 118 static bool WorksOnCpu() { return true; } |
| 72 static void BlendN(uint32_t* dst, int count, const uint32_t* src) { | 119 static void BlendN(uint32_t* dst, const uint32_t* src, int count) { |
| 73 sk_default::srcover_srgb_srgb(dst, src, count, count); | 120 sk_default::srcover_srgb_srgb(dst, src, count, count); |
| 74 } | 121 } |
| 75 }; | 122 }; |
| 76 | 123 |
| 77 namespace sk_sse41 { | 124 namespace sk_sse41 { |
| 78 extern void srcover_srgb_srgb( | 125 extern void srcover_srgb_srgb( |
| 79 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); | 126 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); |
| 80 } | 127 } |
| 81 | 128 |
| 82 class SrcOverVSkOptsSSE41 { | 129 class SrcOverVSkOptsSSE41 { |
| 83 public: | 130 public: |
| 84 static SkString Name() { return SkString{"VSkOptsSSE41"}; } | 131 static SkString Name() { return SkString{"VSkOptsSSE41"}; } |
| 85 static bool WorksOnCpu() { return SkCpu::Supports(SkCpu::SSE41); } | 132 static bool WorksOnCpu() { return SkCpu::Supports(SkCpu::SSE41); } |
| 86 static void BlendN(uint32_t* dst, int count, const uint32_t* src) { | 133 static void BlendN(uint32_t* dst, const uint32_t* src, int count) { |
| 87 sk_sse41::srcover_srgb_srgb(dst, src, count, count); | 134 sk_sse41::srcover_srgb_srgb(dst, src, count, count); |
| 88 } | 135 } |
| 89 }; | 136 }; |
| 90 | 137 |
| 91 ////////////////////////////////////////////////////////////////////////////////
/////////////////// | 138 ////////////////////////////////////////////////////////////////////////////////
/////////////////// |
| 92 | 139 |
| 93 template <typename Blender> | 140 template <typename Blender> |
| 94 class LinearSrcOverBench : public Benchmark { | 141 class LinearSrcOverBench : public Benchmark { |
| 95 public: | 142 public: |
| 96 LinearSrcOverBench(const char* fileName) : fFileName(fileName) { | 143 LinearSrcOverBench(const char* fileName) : fFileName(fileName) { |
| 97 fName = "LinearSrcOver"; | 144 fName = "LinearSrcOver_"; |
| 98 fName.append(fileName); | 145 fName.append(fileName); |
| 99 fName.append(Blender::Name()); | 146 fName.append(Blender::Name()); |
| 100 } | 147 } |
| 101 | 148 |
| 102 protected: | 149 protected: |
| 103 bool isSuitableFor(Backend backend) override { | 150 bool isSuitableFor(Backend backend) override { |
| 104 return backend == kNonRendering_Backend && Blender::WorksOnCpu(); | 151 return backend == kNonRendering_Backend && Blender::WorksOnCpu(); |
| 105 } | 152 } |
| 106 const char* onGetName() override { return fName.c_str(); } | 153 const char* onGetName() override { return fName.c_str(); } |
| 107 | 154 |
| 108 void onPreDraw(SkCanvas*) override { | 155 void onPreDraw(SkCanvas*) override { |
| 109 if (!fPixmap.addr()) { | 156 if (!fPixmap.addr()) { |
| 110 sk_sp<SkImage> image = GetResourceAsImage(fFileName.c_str()); | 157 sk_sp<SkImage> image = GetResourceAsImage(fFileName.c_str()); |
| 111 SkBitmap bm; | 158 SkBitmap bm; |
| 112 if (!as_IB(image)->getROPixels(&bm)) { | 159 if (!as_IB(image)->getROPixels(&bm)) { |
| 113 SkFAIL("Could not read resource"); | 160 SkFAIL("Could not read resource"); |
| 114 } | 161 } |
| 115 bm.peekPixels(&fPixmap); | 162 bm.peekPixels(&fPixmap); |
| 116 fCount = fPixmap.rowBytesAsPixels(); | 163 fCount = fPixmap.rowBytesAsPixels(); |
| 117 fDst.reset(fCount); | 164 fDst.reset(fCount); |
| 118 memset(fDst.get(), 0, fPixmap.rowBytes()); | 165 sk_bzero(fDst.get(), fPixmap.rowBytes()); |
| 119 } | 166 } |
| 120 } | 167 } |
| 121 | 168 |
| 122 void onDraw(int loops, SkCanvas*) override { | 169 void onDraw(int loops, SkCanvas*) override { |
| 123 SkASSERT(fPixmap.colorType() == kN32_SkColorType); | 170 SkASSERT(fPixmap.colorType() == kN32_SkColorType); |
| 124 | 171 |
| 125 const int width = fPixmap.rowBytesAsPixels(); | 172 const int width = fPixmap.rowBytesAsPixels(); |
| 126 | 173 |
| 127 for (int i = 0; i < loops * INNER_LOOPS; ++i) { | 174 for (int i = 0; i < loops * INNER_LOOPS; ++i) { |
| 128 const uint32_t* src = fPixmap.addr32(); | 175 const uint32_t* src = fPixmap.addr32(); |
| 129 for (int y = 0; y < fPixmap.height(); y++) { | 176 for (int y = 0; y < fPixmap.height(); y++) { |
| 130 Blender::BlendN(fDst.get(), width, src); | 177 Blender::BlendN(fDst.get(), src, width); |
| 131 src += width; | 178 src += width; |
| 132 } | 179 } |
| 133 } | 180 } |
| 134 } | 181 } |
| 135 | 182 |
| 136 void onPostDraw(SkCanvas*) override { | 183 void onPostDraw(SkCanvas*) override { |
| 137 // Make sure the compiler does not optimize away the operation. | 184 // Make sure the compiler does not optimize away the operation. |
| 138 volatile uint32_t v = 0; | 185 volatile uint32_t v = 0; |
| 139 for (int i = 0; i < fCount; i++) { | 186 for (int i = 0; i < fCount; i++) { |
| 140 v ^= fDst[i]; | 187 v ^= fDst[i]; |
| (...skipping 23 matching lines...) Expand all Loading... |
| 164 DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsTrivial>(fileName); )
\ | 211 DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsTrivial>(fileName); )
\ |
| 165 DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsNonSimdCore>(fileName); )
\ | 212 DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsNonSimdCore>(fileName); )
\ |
| 166 DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsDefault>(fileName); ) | 213 DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsDefault>(fileName); ) |
| 167 #endif | 214 #endif |
| 168 | 215 |
| 169 BENCHES("yellow_rose.png") | 216 BENCHES("yellow_rose.png") |
| 170 BENCHES("baby_tux.png") | 217 BENCHES("baby_tux.png") |
| 171 BENCHES("plane.png") | 218 BENCHES("plane.png") |
| 172 BENCHES("mandrill_512.png") | 219 BENCHES("mandrill_512.png") |
| 173 BENCHES("iconstrip.png") | 220 BENCHES("iconstrip.png") |
| OLD | NEW |