Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(123)

Unified Diff: src/codec/SkSwizzler.cpp

Issue 1656543002: NEON for table lookups? (Closed) Base URL: https://skia.googlesource.com/skia.git@index
Patch Set: Inline assembly Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « dm/DM.cpp ('k') | src/opts/SkBlitRow_opts_arm.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/codec/SkSwizzler.cpp
diff --git a/src/codec/SkSwizzler.cpp b/src/codec/SkSwizzler.cpp
index 7865184cced94e91a0d22caf9291093ed6211c07..508cc8d71eaf2f9ca02ea8e2265b884a72407626 100644
--- a/src/codec/SkSwizzler.cpp
+++ b/src/codec/SkSwizzler.cpp
@@ -220,7 +220,75 @@ static void swizzle_small_index_to_n32(
static void swizzle_index_to_n32(
void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
+#if defined(SK_ARM_HAS_NEON)
+ uint32_t* dst = (uint32_t*) dstRow;
+ src += offset;
+
+ while (dstWidth >= 16) {
+ // Table registers
+ uint8x16x4_t t0, t1;
+ uint8x8x4_t tr, tg, tb, ta;
+
+ // Indices into table
+ uint8x16_t indices = vld1q_u8(src);
+
+ // Pixel output registers
+ uint8x16x4_t rgba;
+ rgba.val[0] = vdupq_n_u8(0);
+ rgba.val[1] = vdupq_n_u8(0);
+ rgba.val[2] = vdupq_n_u8(0);
+ rgba.val[3] = vdupq_n_u8(0);
+
+ const uint32_t* table = ctable;
+ const int numColors = 256;
+ const int numColorsPerLoop = 32;
+ for (int j = 0; j < numColors / numColorsPerLoop; j++) {
+ // Load a separate color table for each of r, g, b, a
+ t0 = vld4q_u8((const uint8_t*) (table + 0)); // rgba
+ t1 = vld4q_u8((const uint8_t*) (table + 16)); // RGBA
+ SkTSwap(t0.val[1], t1.val[0]); // rRba, gGBA
+ SkTSwap(t0.val[3], t1.val[2]); // rRbB, gGaA
+ tr = *(((uint8x8x4_t*) &t0) + 0); // rR
+ tb = *(((uint8x8x4_t*) &t0) + 1); // bB
+ tg = *(((uint8x8x4_t*) &t1) + 0); // gG
+ ta = *(((uint8x8x4_t*) &t1) + 1); // aA
+
+ // Use VTBL, then OR the results together.
+ rgba.val[0] = vorrq_u8(rgba.val[0],
+ vcombine_u8(vtbl4_u8(tr, *(((uint8x8_t*) &indices) + 0)),
+ vtbl4_u8(tr, *(((uint8x8_t*) &indices) + 1))));
+ rgba.val[1] = vorrq_u8(rgba.val[1],
+ vcombine_u8(vtbl4_u8(tg, *(((uint8x8_t*) &indices) + 0)),
+ vtbl4_u8(tg, *(((uint8x8_t*) &indices) + 1))));
+ rgba.val[2] = vorrq_u8(rgba.val[2],
+ vcombine_u8(vtbl4_u8(tb, *(((uint8x8_t*) &indices) + 0)),
+ vtbl4_u8(tb, *(((uint8x8_t*) &indices) + 1))));
+ rgba.val[3] = vorrq_u8(rgba.val[3],
+ vcombine_u8(vtbl4_u8(ta, *(((uint8x8_t*) &indices) + 0)),
+ vtbl4_u8(ta, *(((uint8x8_t*) &indices) + 1))));
+
+ // Move the next set of indices into the range of the color table. Indices
+ // that are currently in range should go out of range due to underflow.
+ indices = vsubq_u8(indices, vdupq_n_u8(32));
+ table += numColorsPerLoop;
+ }
+
+ // Store output pixel values.
+ vst4q_u8((uint8_t*) dst, rgba);
+ src += 16;
+ dst += 16;
+ dstWidth -= 16;
+ }
+
+ for (int x = 0; x < dstWidth; x++) {
+ SkPMColor c = ctable[*src];
+ dst[x] = c;
+ src += deltaSrc;
+ }
+
+
+#else
src += offset;
SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
for (int x = 0; x < dstWidth; x++) {
@@ -228,6 +296,7 @@ static void swizzle_index_to_n32(
dst[x] = c;
src += deltaSrc;
}
+#endif
}
static void swizzle_index_to_n32_skipZ(
« no previous file with comments | « dm/DM.cpp ('k') | src/opts/SkBlitRow_opts_arm.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698