Index: src/opts/SkSwizzler_opts.h |
diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h |
index b0cf4cad5324eae5eb64fcce75eb755d253a02d5..8e1de35ecbc44dc4f4f2037d63fa6064b70f713e 100644 |
--- a/src/opts/SkSwizzler_opts.h |
+++ b/src/opts/SkSwizzler_opts.h |
@@ -60,6 +60,34 @@ static void swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count |
} |
} |
+static void xxx_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) { |
+ int i8 = 0; |
+ const uint8_t* src8 = (uint8_t*) src; |
msarett
2016/01/21 22:09:07
It's a little strange that we pass in src as a poi
|
+ for (int i32 = 0; i32 < count; i32++) { |
+ uint8_t b = src8[i8++], |
+ g = src8[i8++], |
+ r = src8[i8++]; |
+ dst[i32] = (uint32_t) b << 0 |
+ | (uint32_t) g << 8 |
+ | (uint32_t) r << 16 |
+ | (uint32_t)0xFF << 24; |
+ } |
+} |
+ |
+static void xxx_swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) { |
+ int i8 = 0; |
+ const uint8_t* src8 = (uint8_t*) src; |
+ for (int i32 = 0; i32 < count; i32++) { |
+ uint8_t b = src8[i8++], |
+ g = src8[i8++], |
+ r = src8[i8++]; |
+ dst[i32] = (uint32_t) r << 0 |
+ | (uint32_t) g << 8 |
+ | (uint32_t) b << 16 |
+ | (uint32_t)0xFF << 24; |
+ } |
+} |
+ |
#if defined(SK_ARM_HAS_NEON) |
// Rounded divide by 255, (x + 127) / 255 |
@@ -168,6 +196,88 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
swaprb_xxxa_portable(dst, src, count); |
} |
+static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
+ const uint8_t* src8 = (const uint8_t*) src; |
+ while (count >= 16) { |
+ // Load 16 pixels. |
+ uint8x16x3_t bgr = vld3q_u8(src8); |
+ |
+ // Insert an opaque alpha channel. |
+ uint8x16x4_t bgra; |
+ bgra.val[0] = bgr.val[0]; |
+ bgra.val[1] = bgr.val[1]; |
+ bgra.val[2] = bgr.val[2]; |
+ bgra.val[3] = vdupq_n_u8(0xFF); |
+ |
+ // Store 16 pixels. |
+ vst4q_u8((uint8_t*) dst, bgra); |
+ src8 += 48; |
+ dst += 16; |
+ count -= 16; |
+ } |
+ |
+ if (count >= 8) { |
+ // Load 8 pixels. |
+ uint8x8x3_t bgr = vld3_u8(src8); |
+ |
+ // Insert an opaque alpha channel. |
+ uint8x8x4_t bgra; |
+ bgra.val[0] = bgr.val[0]; |
+ bgra.val[1] = bgr.val[1]; |
+ bgra.val[2] = bgr.val[2]; |
+ bgra.val[3] = vdup_n_u8(0xFF); |
+ |
+ // Store 8 pixels. |
+ vst4_u8((uint8_t*) dst, bgra); |
+ src8 += 24; |
+ dst += 8; |
+ count -= 8; |
+ } |
+ |
+ xxx_xxxa_portable(dst, src, count); |
+} |
+ |
+static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
+ const uint8_t* src8 = (const uint8_t*) src; |
+ while (count >= 16) { |
+ // Load 16 pixels. |
+ uint8x16x3_t bgr = vld3q_u8(src8); |
+ |
+ // Swap r and b and insert an opaque alpha channel. |
+ uint8x16x4_t rgba; |
+ rgba.val[0] = bgr.val[2]; |
+ rgba.val[1] = bgr.val[1]; |
+ rgba.val[2] = bgr.val[0]; |
+ rgba.val[3] = vdupq_n_u8(0xFF); |
+ |
+ // Store 16 pixels. |
+ vst4q_u8((uint8_t*) dst, rgba); |
+ src8 += 48; |
+ dst += 16; |
+ count -= 16; |
+ } |
+ |
+ if (count >= 8) { |
+ // Load 8 pixels. |
+ uint8x8x3_t bgr = vld3_u8(src8); |
+ |
+ // Swap r and b and insert an opaque alpha channel. |
+ uint8x8x4_t rgba; |
+ rgba.val[0] = bgr.val[2]; |
+ rgba.val[1] = bgr.val[1]; |
+ rgba.val[2] = bgr.val[0]; |
+ rgba.val[3] = vdup_n_u8(0xFF); |
+ |
+ // Store 8 pixels. |
+ vst4_u8((uint8_t*) dst, rgba); |
+ src8 += 24; |
+ dst += 8; |
+ count -= 8; |
+ } |
+ |
+ xxx_swaprb_xxxa_portable(dst, src, count); |
+} |
+ |
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
template <bool kSwapRB> |
@@ -264,6 +374,14 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
swaprb_xxxa_portable(dst, src, count); |
} |
+static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
+ xxx_xxxa_portable(dst, src, count); |
+} |
+ |
+static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
+ xxx_swaprb_xxxa_portable(dst, src, count); |
+} |
+ |
#else |
static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
@@ -278,6 +396,14 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
swaprb_xxxa_portable(dst, src, count); |
} |
+static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
+ xxx_xxxa_portable(dst, src, count); |
+} |
+ |
+static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
+ xxx_swaprb_xxxa_portable(dst, src, count); |
+} |
+ |
#endif |
} |