Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(153)

Side by Side Diff: src/core/SkXfermode.cpp

Issue 1043413002: experimental speedup some xfermodes with Sk4f (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: fix comment Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « bench/XfermodeBench.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 /* 2 /*
3 * Copyright 2006 The Android Open Source Project 3 * Copyright 2006 The Android Open Source Project
4 * 4 *
5 * Use of this source code is governed by a BSD-style license that can be 5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file. 6 * found in the LICENSE file.
7 */ 7 */
8 8
9 #include "SkXfermode.h" 9 #include "SkXfermode.h"
10 #include "SkXfermode_opts_SSE2.h" 10 #include "SkXfermode_opts_SSE2.h"
11 #include "SkXfermode_proccoeff.h" 11 #include "SkXfermode_proccoeff.h"
12 #include "SkColorPriv.h" 12 #include "SkColorPriv.h"
13 #include "SkLazyPtr.h" 13 #include "SkLazyPtr.h"
14 #include "SkMathPriv.h" 14 #include "SkMathPriv.h"
15 #include "SkPMFloat.h"
15 #include "SkReadBuffer.h" 16 #include "SkReadBuffer.h"
16 #include "SkString.h" 17 #include "SkString.h"
17 #include "SkUtilsArm.h" 18 #include "SkUtilsArm.h"
18 #include "SkWriteBuffer.h" 19 #include "SkWriteBuffer.h"
19 20
21 #define SK_SUPPORT_LEGACY_SCALAR_XFERMODES
22
20 #if !SK_ARM_NEON_IS_NONE 23 #if !SK_ARM_NEON_IS_NONE
21 #include "SkXfermode_opts_arm_neon.h" 24 #include "SkXfermode_opts_arm_neon.h"
22 #endif 25 #endif
23 26
24 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) 27 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b)
25 28
26 #if 0
27 // idea for higher precision blends in xfer procs (and slightly faster)
28 // see DstATop as a probable caller
29 static U8CPU mulmuldiv255round(U8CPU a, U8CPU b, U8CPU c, U8CPU d) {
30 SkASSERT(a <= 255);
31 SkASSERT(b <= 255);
32 SkASSERT(c <= 255);
33 SkASSERT(d <= 255);
34 unsigned prod = SkMulS16(a, b) + SkMulS16(c, d) + 128;
35 unsigned result = (prod + (prod >> 8)) >> 8;
36 SkASSERT(result <= 255);
37 return result;
38 }
39 #endif
40
41 static inline unsigned saturated_add(unsigned a, unsigned b) { 29 static inline unsigned saturated_add(unsigned a, unsigned b) {
42 SkASSERT(a <= 255); 30 SkASSERT(a <= 255);
43 SkASSERT(b <= 255); 31 SkASSERT(b <= 255);
44 unsigned sum = a + b; 32 unsigned sum = a + b;
45 if (sum > 255) { 33 if (sum > 255) {
46 sum = 255; 34 sum = 255;
47 } 35 }
48 return sum; 36 return sum;
49 } 37 }
50 38
(...skipping 1128 matching lines...) Expand 10 before | Expand all | Expand 10 after
1179 } 1167 }
1180 1168
1181 #ifndef SK_IGNORE_TO_STRING 1169 #ifndef SK_IGNORE_TO_STRING
1182 void SkDstInXfermode::toString(SkString* str) const { 1170 void SkDstInXfermode::toString(SkString* str) const {
1183 this->INHERITED::toString(str); 1171 this->INHERITED::toString(str);
1184 } 1172 }
1185 #endif 1173 #endif
1186 1174
1187 /////////////////////////////////////////////////////////////////////////////// 1175 ///////////////////////////////////////////////////////////////////////////////
1188 1176
1177 /* These modes can merge coverage into src-alpha
1178 *
1179 { dst_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kOne_Coeff },
1180 { srcover_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kISA_Coeff },
1181 { dstover_modeproc, SkXfermode::kIDA_Coeff, SkXfermode::kOne_Coeff },
1182 { dstout_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kISA_Coeff },
1183 { srcatop_modeproc, SkXfermode::kDA_Coeff, SkXfermode::kISA_Coeff },
1184 { xor_modeproc, SkXfermode::kIDA_Coeff, SkXfermode::kISA_Coeff },
1185 { plus_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kOne_Coeff },
1186 { screen_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kISC_Coeff },
1187 */
1188
1189 #ifndef SK_SUPPORT_LEGACY_SCALAR_XFERMODES
1190 static const float gInv255 = 0.0039215683f; // (1.0f / 255) - ULP == SkBits2Flo at(0x3B808080)
1191
1192 static Sk4f ramp(const Sk4f& v0, const Sk4f& v1, const Sk4f& t) {
1193 return v0 + (v1 - v0) * t;
1194 }
1195
1196 static Sk4f clamp_255(const Sk4f& value) {
1197 return Sk4f::Min(value, Sk4f(255));
1198 }
1199
1200 /**
1201 * Some modes can, due to very slight numerical error, generate "invalid" pmcol ors...
1202 *
1203 * e.g.
1204 * alpha = 100.9999
1205 * red = 101
1206 *
1207 * or
1208 * alpha = 255.0001
1209 *
1210 * If we know we're going to write-out the values as bytes, we can relax these somewhat,
1211 * since we only really need to enforce that the bytes are valid premul...
1212 *
1213 * To that end, this method asserts that the resulting pmcolor will be valid, b ut does not call
1214 * SkPMFloat::isValid(), as that would fire sometimes, but not result in a bad pixel.
1215 */
1216 static inline SkPMFloat check_as_pmfloat(const Sk4f& value) {
1217 SkPMFloat pm = value;
1218 #ifdef SK_DEBUG
1219 (void)pm.get();
1220 #endif
1221 return pm;
1222 }
1223
1224 // kSrcATop_Mode, //!< [Da, Sc * Da + (1 - Sa) * Dc]
1225 struct SrcATop4f {
1226 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1227 const Sk4f inv255(gInv255);
1228 Sk4f s4 = src;
1229 Sk4f d4 = dst;
1230 return check_as_pmfloat(d4 + (s4 * Sk4f(dst.a()) - d4 * Sk4f(src.a())) * inv255);
1231 }
1232 static const bool kFoldCoverageIntoSrcAlpha = true;
1233 static const SkXfermode::Mode kMode = SkXfermode::kSrcATop_Mode;
1234 };
1235
1236 // kDstATop_Mode, //!< [Sa, Sa * Dc + Sc * (1 - Da)]
1237 struct DstATop4f {
1238 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1239 const Sk4f inv255(gInv255);
1240 Sk4f s4 = src;
1241 Sk4f d4 = dst;
1242 return check_as_pmfloat(s4 + (d4 * Sk4f(src.a()) - s4 * Sk4f(dst.a())) * inv255);
1243 }
1244 static const bool kFoldCoverageIntoSrcAlpha = false;
1245 static const SkXfermode::Mode kMode = SkXfermode::kDstATop_Mode;
1246 };
1247
1248 // kXor_Mode [Sa + Da - 2 * Sa * Da, Sc * (1 - Da) + (1 - Sa) * Dc]
1249 struct Xor4f {
1250 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1251 const Sk4f inv255(gInv255);
1252 Sk4f s4 = src;
1253 Sk4f d4 = dst;
1254 return check_as_pmfloat(s4 + d4 - (s4 * Sk4f(dst.a()) + d4 * Sk4f(src.a( ))) * inv255);
1255 }
1256 static const bool kFoldCoverageIntoSrcAlpha = true;
1257 static const SkXfermode::Mode kMode = SkXfermode::kXor_Mode;
1258 };
1259
1260 // kPlus_Mode [Sa + Da, Sc + Dc]
1261 struct Plus4f {
1262 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1263 Sk4f s4 = src;
1264 Sk4f d4 = dst;
1265 return check_as_pmfloat(clamp_255(s4 + d4));
1266 }
1267 static const bool kFoldCoverageIntoSrcAlpha = true;
1268 static const SkXfermode::Mode kMode = SkXfermode::kPlus_Mode;
1269 };
1270
1271 // kModulate_Mode [Sa * Da, Sc * Dc]
1272 struct Modulate4f {
1273 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1274 const Sk4f inv255(gInv255);
1275 Sk4f s4 = src;
1276 Sk4f d4 = dst;
1277 return check_as_pmfloat(s4 * d4 * inv255);
1278 }
1279 static const bool kFoldCoverageIntoSrcAlpha = false;
1280 static const SkXfermode::Mode kMode = SkXfermode::kModulate_Mode;
1281 };
1282
1283 // kScreen_Mode [S + D - S * D]
1284 struct Screen4f {
1285 static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
1286 const Sk4f inv255(gInv255);
1287 Sk4f s4 = src;
1288 Sk4f d4 = dst;
1289 return check_as_pmfloat(check_as_pmfloat(s4 + d4 - s4 * d4 * inv255));
1290 }
1291 static const bool kFoldCoverageIntoSrcAlpha = true;
1292 static const SkXfermode::Mode kMode = SkXfermode::kScreen_Mode;
1293 };
1294
1295 template <typename ProcType>
1296 class SkT4fXfermode : public SkProcCoeffXfermode {
1297 public:
1298 static SkXfermode* Create(const ProcCoeff& rec) {
1299 return SkNEW_ARGS(SkT4fXfermode, (rec));
1300 }
1301
1302 void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[ ]) const override {
1303 if (NULL == aa) {
1304 while (n & 3) {
1305 *dst = ProcType::Xfer(SkPMFloat(*src++), SkPMFloat(*dst)).get();
1306 dst++;
1307 n -= 1;
1308 }
1309 n >>= 2;
1310 for (int i = 0; i < n; ++i) {
1311 SkPMFloat s0, s1, s2, s3;
1312 SkPMFloat::From4PMColors(src, &s0, &s1, &s2, &s3);
1313 SkPMFloat d0, d1, d2, d3;
1314 SkPMFloat::From4PMColors(dst, &d0, &d1, &d2, &d3);
1315 SkPMFloat::To4PMColors(ProcType::Xfer(s0, d0), ProcType::Xfer(s1 , d1),
1316 ProcType::Xfer(s2, d2), ProcType::Xfer(s3 , d3), dst);
1317 src += 4;
1318 dst += 4;
1319 }
1320 } else {
1321 for (int i = 0; i < n; ++i) {
1322 const Sk4f aa4 = Sk4f(aa[i] * gInv255);
1323 SkPMFloat dstF(dst[i]);
1324 SkPMFloat srcF(src[i]);
1325 Sk4f res;
1326 if (ProcType::kFoldCoverageIntoSrcAlpha) {
1327 Sk4f src4 = srcF;
1328 res = ProcType::Xfer(src4 * aa4, dstF);
1329 } else {
1330 res = ramp(dstF, ProcType::Xfer(srcF, dstF), aa4);
1331 }
1332 dst[i] = SkPMFloat(res).get();
1333 }
1334 }
1335 }
1336
1337 private:
1338 SkT4fXfermode(const ProcCoeff& rec) : SkProcCoeffXfermode(rec, ProcType::kMo de) {}
1339
1340 typedef SkProcCoeffXfermode INHERITED;
1341 };
1342 #endif
1343
1344 ///////////////////////////////////////////////////////////////////////////////
1345
1189 class SkDstOutXfermode : public SkProcCoeffXfermode { 1346 class SkDstOutXfermode : public SkProcCoeffXfermode {
1190 public: 1347 public:
1191 static SkDstOutXfermode* Create(const ProcCoeff& rec) { 1348 static SkDstOutXfermode* Create(const ProcCoeff& rec) {
1192 return SkNEW_ARGS(SkDstOutXfermode, (rec)); 1349 return SkNEW_ARGS(SkDstOutXfermode, (rec));
1193 } 1350 }
1194 1351
1195 void xfer32(SkPMColor*, const SkPMColor*, int, const SkAlpha*) const overrid e; 1352 void xfer32(SkPMColor*, const SkPMColor*, int, const SkAlpha*) const overrid e;
1196 1353
1197 SK_TO_STRING_OVERRIDE() 1354 SK_TO_STRING_OVERRIDE()
1198 1355
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1238 SkXfermode* create_mode(int iMode) { 1395 SkXfermode* create_mode(int iMode) {
1239 SkXfermode::Mode mode = (SkXfermode::Mode)iMode; 1396 SkXfermode::Mode mode = (SkXfermode::Mode)iMode;
1240 1397
1241 ProcCoeff rec = gProcCoeffs[mode]; 1398 ProcCoeff rec = gProcCoeffs[mode];
1242 SkXfermodeProc pp = SkPlatformXfermodeProcFactory(mode); 1399 SkXfermodeProc pp = SkPlatformXfermodeProcFactory(mode);
1243 if (pp != NULL) { 1400 if (pp != NULL) {
1244 rec.fProc = pp; 1401 rec.fProc = pp;
1245 } 1402 }
1246 1403
1247 SkXfermode* xfer = NULL; 1404 SkXfermode* xfer = NULL;
1405
1406 #ifndef SK_SUPPORT_LEGACY_SCALAR_XFERMODES
1407 switch (mode) {
1408 case SkXfermode::kSrcATop_Mode:
1409 xfer = SkT4fXfermode<SrcATop4f>::Create(rec);
1410 break;
1411 case SkXfermode::kDstATop_Mode:
1412 xfer = SkT4fXfermode<DstATop4f>::Create(rec);
1413 break;
1414 case SkXfermode::kXor_Mode:
1415 xfer = SkT4fXfermode<Xor4f>::Create(rec);
1416 break;
1417 case SkXfermode::kPlus_Mode:
1418 xfer = SkT4fXfermode<Plus4f>::Create(rec);
1419 break;
1420 case SkXfermode::kModulate_Mode:
1421 xfer = SkT4fXfermode<Modulate4f>::Create(rec);
1422 break;
1423 case SkXfermode::kScreen_Mode:
1424 xfer = SkT4fXfermode<Screen4f>::Create(rec);
1425 break;
1426 default:
1427 break;
1428 }
1429 if (xfer) {
1430 return xfer;
1431 }
1432 #endif
1433
1248 // check if we have a platform optim for that 1434 // check if we have a platform optim for that
1249 SkProcCoeffXfermode* xfm = SkPlatformXfermodeFactory(rec, mode); 1435 SkProcCoeffXfermode* xfm = SkPlatformXfermodeFactory(rec, mode);
1250 if (xfm != NULL) { 1436 if (xfm != NULL) {
1251 xfer = xfm; 1437 xfer = xfm;
1252 } else { 1438 } else {
1253 // All modes can in theory be represented by the ProcCoeff rec, since 1439 // All modes can in theory be represented by the ProcCoeff rec, since
1254 // it contains function ptrs. However, a few modes are both simple and 1440 // it contains function ptrs. However, a few modes are both simple and
1255 // commonly used, so we call those out for their own subclasses here. 1441 // commonly used, so we call those out for their own subclasses here.
1256 switch (mode) { 1442 switch (mode) {
1257 case SkXfermode::kClear_Mode: 1443 case SkXfermode::kClear_Mode:
(...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after
1536 } else { 1722 } else {
1537 proc16 = rec.fProc16_General; 1723 proc16 = rec.fProc16_General;
1538 } 1724 }
1539 } 1725 }
1540 return proc16; 1726 return proc16;
1541 } 1727 }
1542 1728
1543 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_START(SkXfermode) 1729 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_START(SkXfermode)
1544 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkProcCoeffXfermode) 1730 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkProcCoeffXfermode)
1545 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_END 1731 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_END
OLDNEW
« no previous file with comments | « bench/XfermodeBench.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698