Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(432)

Side by Side Diff: src/core/SkXfermode.cpp

Issue 1043413002: experimental speedup some xfermodes with Sk4f (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: propose landing disabled first, to reset benchmarks Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « bench/XfermodeBench.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 /* 2 /*
3 * Copyright 2006 The Android Open Source Project 3 * Copyright 2006 The Android Open Source Project
4 * 4 *
5 * Use of this source code is governed by a BSD-style license that can be 5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file. 6 * found in the LICENSE file.
7 */ 7 */
8 8
9 #include "SkXfermode.h" 9 #include "SkXfermode.h"
10 #include "SkXfermode_opts_SSE2.h" 10 #include "SkXfermode_opts_SSE2.h"
11 #include "SkXfermode_proccoeff.h" 11 #include "SkXfermode_proccoeff.h"
12 #include "SkColorPriv.h" 12 #include "SkColorPriv.h"
13 #include "SkLazyPtr.h" 13 #include "SkLazyPtr.h"
14 #include "SkMathPriv.h" 14 #include "SkMathPriv.h"
15 #include "SkPMFloat.h"
15 #include "SkReadBuffer.h" 16 #include "SkReadBuffer.h"
16 #include "SkString.h" 17 #include "SkString.h"
17 #include "SkUtilsArm.h" 18 #include "SkUtilsArm.h"
18 #include "SkWriteBuffer.h" 19 #include "SkWriteBuffer.h"
19 20
21 #define SK_SUPPORT_LEGACY_SCALAR_XFERMODES
22
20 #if !SK_ARM_NEON_IS_NONE 23 #if !SK_ARM_NEON_IS_NONE
21 #include "SkXfermode_opts_arm_neon.h" 24 #include "SkXfermode_opts_arm_neon.h"
22 #endif 25 #endif
23 26
24 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) 27 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b)
25 28
26 #if 0
27 // idea for higher precision blends in xfer procs (and slightly faster)
28 // see DstATop as a probable caller
29 static U8CPU mulmuldiv255round(U8CPU a, U8CPU b, U8CPU c, U8CPU d) {
30 SkASSERT(a <= 255);
31 SkASSERT(b <= 255);
32 SkASSERT(c <= 255);
33 SkASSERT(d <= 255);
34 unsigned prod = SkMulS16(a, b) + SkMulS16(c, d) + 128;
35 unsigned result = (prod + (prod >> 8)) >> 8;
36 SkASSERT(result <= 255);
37 return result;
38 }
39 #endif
40
41 static inline unsigned saturated_add(unsigned a, unsigned b) { 29 static inline unsigned saturated_add(unsigned a, unsigned b) {
42 SkASSERT(a <= 255); 30 SkASSERT(a <= 255);
43 SkASSERT(b <= 255); 31 SkASSERT(b <= 255);
44 unsigned sum = a + b; 32 unsigned sum = a + b;
45 if (sum > 255) { 33 if (sum > 255) {
46 sum = 255; 34 sum = 255;
47 } 35 }
48 return sum; 36 return sum;
49 } 37 }
50 38
(...skipping 1128 matching lines...) Expand 10 before | Expand all | Expand 10 after
1179 } 1167 }
1180 1168
1181 #ifndef SK_IGNORE_TO_STRING 1169 #ifndef SK_IGNORE_TO_STRING
1182 void SkDstInXfermode::toString(SkString* str) const { 1170 void SkDstInXfermode::toString(SkString* str) const {
1183 this->INHERITED::toString(str); 1171 this->INHERITED::toString(str);
1184 } 1172 }
1185 #endif 1173 #endif
1186 1174
1187 /////////////////////////////////////////////////////////////////////////////// 1175 ///////////////////////////////////////////////////////////////////////////////
1188 1176
1177 /* These modes can merge coverage into src-alpha
1178 *
1179 { dst_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kOne_Coeff },
1180 { srcover_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kISA_Coeff },
1181 { dstover_modeproc, SkXfermode::kIDA_Coeff, SkXfermode::kOne_Coeff },
1182 { dstout_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kISA_Coeff },
1183 { srcatop_modeproc, SkXfermode::kDA_Coeff, SkXfermode::kISA_Coeff },
1184 { xor_modeproc, SkXfermode::kIDA_Coeff, SkXfermode::kISA_Coeff },
1185 { plus_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kOne_Coeff },
1186 { screen_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kISC_Coeff },
1187 */
1188
1189 #ifndef SK_SUPPORT_LEGACY_SCALAR_XFERMODES
1190 static const float gInv255 = 0.0039215683f; // (1.0f / 255) - ULP == SkBits2Flo at(0x3B808080)
1191
1192 static Sk4f ramp(const Sk4f& v0, const Sk4f& v1, const Sk4f& t) {
1193 return v0 + (v1 - v0) * t;
1194 }
1195
1196 static Sk4f clamp_255(const Sk4f& value) {
1197 return Sk4f::Min(value, Sk4f(255));
1198 }
1199
1200 static inline SkPMFloat check_as_pmfloat(const Sk4f& value) {
1201 SkPMFloat pm = value;
1202 SkASSERT(pm.isValid());
1203 return pm;
1204 }
1205
1206 static inline SkPMFloat enforce_as_pmfloat(const Sk4f& value) {
1207 Sk4f alpha(SkPMFloat(value).a());
1208 Sk4f pinnedAlpha = Sk4f::Min(alpha, Sk4f(255));
1209 SkPMFloat pm = Sk4f::Min(value, pinnedAlpha);
mtklein 2015/04/02 17:37:15 Guh. I take it the places that are using enforce_
1210 SkASSERT(pm.isValid());
1211 return pm;
1212 }
1213
1214 // kSrcATop_Mode, //!< [Da, Sc * Da + (1 - Sa) * Dc]
1215 struct SrcATop4f {
1216 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1217 const Sk4f inv255(gInv255);
1218 Sk4f s4 = src;
1219 Sk4f d4 = dst;
1220 return check_as_pmfloat(d4 + (s4 * Sk4f(dst.a()) - d4 * Sk4f(src.a())) * inv255);
1221 }
1222 static const bool kFoldCoverageIntoSrcAlpha = true;
1223 static const SkXfermode::Mode kMode = SkXfermode::kSrcATop_Mode;
1224 };
1225
1226 // kDstATop_Mode, //!< [Sa, Sa * Dc + Sc * (1 - Da)]
1227 struct DstATop4f {
1228 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1229 const Sk4f inv255(gInv255);
1230 Sk4f s4 = src;
1231 Sk4f d4 = dst;
1232 return check_as_pmfloat(s4 + (d4 * Sk4f(src.a()) - s4 * Sk4f(dst.a())) * inv255);
1233 }
1234 static const bool kFoldCoverageIntoSrcAlpha = false;
1235 static const SkXfermode::Mode kMode = SkXfermode::kDstATop_Mode;
1236 };
1237
1238 // kXor_Mode [Sa + Da - 2 * Sa * Da, Sc * (1 - Da) + (1 - Sa) * Dc]
1239 struct Xor4f {
1240 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1241 const Sk4f inv255(gInv255);
1242 Sk4f s4 = src;
1243 Sk4f d4 = dst;
1244 return enforce_as_pmfloat(s4 + d4 - (s4 * Sk4f(dst.a()) + d4 * Sk4f(src. a())) * inv255);
1245 }
1246 static const bool kFoldCoverageIntoSrcAlpha = true;
1247 static const SkXfermode::Mode kMode = SkXfermode::kXor_Mode;
1248 };
1249
1250 // kPlus_Mode [Sa + Da, Sc + Dc]
1251 struct Plus4f {
1252 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1253 Sk4f s4 = src;
1254 Sk4f d4 = dst;
1255 return check_as_pmfloat(clamp_255(s4 + d4));
1256 }
1257 static const bool kFoldCoverageIntoSrcAlpha = true;
1258 static const SkXfermode::Mode kMode = SkXfermode::kPlus_Mode;
1259 };
1260
1261 // kModulate_Mode [Sa * Da, Sc * Dc]
1262 struct Modulate4f {
1263 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1264 const Sk4f inv255(gInv255);
1265 Sk4f s4 = src;
1266 Sk4f d4 = dst;
1267 return check_as_pmfloat(s4 * d4 * inv255);
1268 }
1269 static const bool kFoldCoverageIntoSrcAlpha = false;
1270 static const SkXfermode::Mode kMode = SkXfermode::kModulate_Mode;
1271 };
1272
1273 // kScreen_Mode [S + D - S * D]
1274 struct Screen4f {
1275 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1276 const Sk4f inv255(gInv255);
1277 Sk4f s4 = src;
1278 Sk4f d4 = dst;
1279 return check_as_pmfloat(enforce_as_pmfloat(s4 + d4 - s4 * d4 * inv255));
1280 }
1281 static const bool kFoldCoverageIntoSrcAlpha = true;
1282 static const SkXfermode::Mode kMode = SkXfermode::kScreen_Mode;
1283 };
1284
1285 template <typename ProcType>
1286 class SkT4fXfermode : public SkProcCoeffXfermode {
1287 public:
1288 static SkXfermode* Create(const ProcCoeff& rec) {
1289 return SkNEW_ARGS(SkT4fXfermode, (rec));
1290 }
1291
1292 void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[ ]) const override {
1293 if (NULL == aa) {
1294 while (n & 3) {
1295 *dst = ProcType::Xfer(SkPMFloat(*src++), SkPMFloat(*dst)).get();
1296 dst++;
1297 n -= 1;
1298 }
1299 n >>= 2;
1300 for (int i = 0; i < n; ++i) {
1301 SkPMFloat s0, s1, s2, s3;
1302 SkPMFloat::From4PMColors(src, &s0, &s1, &s2, &s3);
1303 SkPMFloat d0, d1, d2, d3;
1304 SkPMFloat::From4PMColors(dst, &d0, &d1, &d2, &d3);
1305 SkPMFloat::To4PMColors(ProcType::Xfer(s0, d0), ProcType::Xfer(s1 , d1),
1306 ProcType::Xfer(s2, d2), ProcType::Xfer(s3 , d3), dst);
1307 src += 4;
1308 dst += 4;
1309 }
1310 } else {
1311 for (int i = 0; i < n; ++i) {
1312 const Sk4f aa4 = Sk4f(aa[i] * gInv255);
1313 SkPMFloat dstF(dst[i]);
1314 SkPMFloat srcF(src[i]);
1315 Sk4f res;
1316 if (ProcType::kFoldCoverageIntoSrcAlpha) {
1317 Sk4f src4 = srcF;
1318 res = ProcType::Xfer(src4 * aa4, dstF);
1319 } else {
1320 res = ramp(dstF, ProcType::Xfer(srcF, dstF), aa4);
1321 }
1322 dst[i] = SkPMFloat(res).get();
1323 }
1324 }
1325 }
1326
1327 private:
1328 SkT4fXfermode(const ProcCoeff& rec) : SkProcCoeffXfermode(rec, ProcType::kMo de) {}
1329
1330 typedef SkProcCoeffXfermode INHERITED;
1331 };
1332 #endif
1333
1334 ///////////////////////////////////////////////////////////////////////////////
1335
1189 class SkDstOutXfermode : public SkProcCoeffXfermode { 1336 class SkDstOutXfermode : public SkProcCoeffXfermode {
1190 public: 1337 public:
1191 static SkDstOutXfermode* Create(const ProcCoeff& rec) { 1338 static SkDstOutXfermode* Create(const ProcCoeff& rec) {
1192 return SkNEW_ARGS(SkDstOutXfermode, (rec)); 1339 return SkNEW_ARGS(SkDstOutXfermode, (rec));
1193 } 1340 }
1194 1341
1195 void xfer32(SkPMColor*, const SkPMColor*, int, const SkAlpha*) const overrid e; 1342 void xfer32(SkPMColor*, const SkPMColor*, int, const SkAlpha*) const overrid e;
1196 1343
1197 SK_TO_STRING_OVERRIDE() 1344 SK_TO_STRING_OVERRIDE()
1198 1345
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1238 SkXfermode* create_mode(int iMode) { 1385 SkXfermode* create_mode(int iMode) {
1239 SkXfermode::Mode mode = (SkXfermode::Mode)iMode; 1386 SkXfermode::Mode mode = (SkXfermode::Mode)iMode;
1240 1387
1241 ProcCoeff rec = gProcCoeffs[mode]; 1388 ProcCoeff rec = gProcCoeffs[mode];
1242 SkXfermodeProc pp = SkPlatformXfermodeProcFactory(mode); 1389 SkXfermodeProc pp = SkPlatformXfermodeProcFactory(mode);
1243 if (pp != NULL) { 1390 if (pp != NULL) {
1244 rec.fProc = pp; 1391 rec.fProc = pp;
1245 } 1392 }
1246 1393
1247 SkXfermode* xfer = NULL; 1394 SkXfermode* xfer = NULL;
1395
1396 #ifndef SK_SUPPORT_LEGACY_SCALAR_XFERMODES
1397 switch (mode) {
1398 case SkXfermode::kSrcATop_Mode:
1399 xfer = SkT4fXfermode<SrcATop4f>::Create(rec);
1400 break;
1401 case SkXfermode::kDstATop_Mode:
1402 xfer = SkT4fXfermode<DstATop4f>::Create(rec);
1403 break;
1404 case SkXfermode::kXor_Mode:
1405 xfer = SkT4fXfermode<Xor4f>::Create(rec);
1406 break;
1407 case SkXfermode::kPlus_Mode:
1408 xfer = SkT4fXfermode<Plus4f>::Create(rec);
1409 break;
1410 case SkXfermode::kModulate_Mode:
1411 xfer = SkT4fXfermode<Modulate4f>::Create(rec);
1412 break;
1413 case SkXfermode::kScreen_Mode:
1414 xfer = SkT4fXfermode<Screen4f>::Create(rec);
1415 break;
1416 default:
1417 break;
1418 }
1419 if (xfer) {
1420 return xfer;
1421 }
1422 #endif
1423
1248 // check if we have a platform optim for that 1424 // check if we have a platform optim for that
1249 SkProcCoeffXfermode* xfm = SkPlatformXfermodeFactory(rec, mode); 1425 SkProcCoeffXfermode* xfm = SkPlatformXfermodeFactory(rec, mode);
1250 if (xfm != NULL) { 1426 if (xfm != NULL) {
1251 xfer = xfm; 1427 xfer = xfm;
1252 } else { 1428 } else {
1253 // All modes can in theory be represented by the ProcCoeff rec, since 1429 // All modes can in theory be represented by the ProcCoeff rec, since
1254 // it contains function ptrs. However, a few modes are both simple and 1430 // it contains function ptrs. However, a few modes are both simple and
1255 // commonly used, so we call those out for their own subclasses here. 1431 // commonly used, so we call those out for their own subclasses here.
1256 switch (mode) { 1432 switch (mode) {
1257 case SkXfermode::kClear_Mode: 1433 case SkXfermode::kClear_Mode:
(...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after
1536 } else { 1712 } else {
1537 proc16 = rec.fProc16_General; 1713 proc16 = rec.fProc16_General;
1538 } 1714 }
1539 } 1715 }
1540 return proc16; 1716 return proc16;
1541 } 1717 }
1542 1718
1543 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_START(SkXfermode) 1719 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_START(SkXfermode)
1544 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkProcCoeffXfermode) 1720 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkProcCoeffXfermode)
1545 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_END 1721 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_END
OLDNEW
« no previous file with comments | « bench/XfermodeBench.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698