Chromium Code Reviews| Index: src/opts/SkSwizzler_opts.h |
| diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h |
| index b0cf4cad5324eae5eb64fcce75eb755d253a02d5..8e1de35ecbc44dc4f4f2037d63fa6064b70f713e 100644 |
| --- a/src/opts/SkSwizzler_opts.h |
| +++ b/src/opts/SkSwizzler_opts.h |
| @@ -60,6 +60,34 @@ static void swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count |
| } |
| } |
| +static void xxx_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) { |
| + int i8 = 0; |
| + const uint8_t* src8 = (uint8_t*) src; |
|
msarett
2016/01/21 22:09:07
It's a little strange that we pass in src as a poi
|
| + for (int i32 = 0; i32 < count; i32++) { |
| + uint8_t b = src8[i8++], |
| + g = src8[i8++], |
| + r = src8[i8++]; |
| + dst[i32] = (uint32_t) b << 0 |
| + | (uint32_t) g << 8 |
| + | (uint32_t) r << 16 |
| + | (uint32_t)0xFF << 24; |
| + } |
| +} |
| + |
| +static void xxx_swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) { |
| + int i8 = 0; |
| + const uint8_t* src8 = (uint8_t*) src; |
| + for (int i32 = 0; i32 < count; i32++) { |
| + uint8_t b = src8[i8++], |
| + g = src8[i8++], |
| + r = src8[i8++]; |
| + dst[i32] = (uint32_t) r << 0 |
| + | (uint32_t) g << 8 |
| + | (uint32_t) b << 16 |
| + | (uint32_t)0xFF << 24; |
| + } |
| +} |
| + |
| #if defined(SK_ARM_HAS_NEON) |
| // Rounded divide by 255, (x + 127) / 255 |
| @@ -168,6 +196,88 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| swaprb_xxxa_portable(dst, src, count); |
| } |
| +static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + const uint8_t* src8 = (const uint8_t*) src; |
| + while (count >= 16) { |
| + // Load 16 pixels. |
| + uint8x16x3_t bgr = vld3q_u8(src8); |
| + |
| + // Insert an opaque alpha channel. |
| + uint8x16x4_t bgra; |
| + bgra.val[0] = bgr.val[0]; |
| + bgra.val[1] = bgr.val[1]; |
| + bgra.val[2] = bgr.val[2]; |
| + bgra.val[3] = vdupq_n_u8(0xFF); |
| + |
| + // Store 16 pixels. |
| + vst4q_u8((uint8_t*) dst, bgra); |
| + src8 += 48; |
| + dst += 16; |
| + count -= 16; |
| + } |
| + |
| + if (count >= 8) { |
| + // Load 8 pixels. |
| + uint8x8x3_t bgr = vld3_u8(src8); |
| + |
| + // Insert an opaque alpha channel. |
| + uint8x8x4_t bgra; |
| + bgra.val[0] = bgr.val[0]; |
| + bgra.val[1] = bgr.val[1]; |
| + bgra.val[2] = bgr.val[2]; |
| + bgra.val[3] = vdup_n_u8(0xFF); |
| + |
| + // Store 8 pixels. |
| + vst4_u8((uint8_t*) dst, bgra); |
| + src8 += 24; |
| + dst += 8; |
| + count -= 8; |
| + } |
| + |
| + xxx_xxxa_portable(dst, src, count); |
| +} |
| + |
| +static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + const uint8_t* src8 = (const uint8_t*) src; |
| + while (count >= 16) { |
| + // Load 16 pixels. |
| + uint8x16x3_t bgr = vld3q_u8(src8); |
| + |
| + // Swap r and b and insert an opaque alpha channel. |
| + uint8x16x4_t rgba; |
| + rgba.val[0] = bgr.val[2]; |
| + rgba.val[1] = bgr.val[1]; |
| + rgba.val[2] = bgr.val[0]; |
| + rgba.val[3] = vdupq_n_u8(0xFF); |
| + |
| + // Store 16 pixels. |
| + vst4q_u8((uint8_t*) dst, rgba); |
| + src8 += 48; |
| + dst += 16; |
| + count -= 16; |
| + } |
| + |
| + if (count >= 8) { |
| + // Load 8 pixels. |
| + uint8x8x3_t bgr = vld3_u8(src8); |
| + |
| + // Swap r and b and insert an opaque alpha channel. |
| + uint8x8x4_t rgba; |
| + rgba.val[0] = bgr.val[2]; |
| + rgba.val[1] = bgr.val[1]; |
| + rgba.val[2] = bgr.val[0]; |
| + rgba.val[3] = vdup_n_u8(0xFF); |
| + |
| + // Store 8 pixels. |
| + vst4_u8((uint8_t*) dst, rgba); |
| + src8 += 24; |
| + dst += 8; |
| + count -= 8; |
| + } |
| + |
| + xxx_swaprb_xxxa_portable(dst, src, count); |
| +} |
| + |
| #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| template <bool kSwapRB> |
| @@ -264,6 +374,14 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| swaprb_xxxa_portable(dst, src, count); |
| } |
| +static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + xxx_xxxa_portable(dst, src, count); |
| +} |
| + |
| +static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + xxx_swaprb_xxxa_portable(dst, src, count); |
| +} |
| + |
| #else |
| static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| @@ -278,6 +396,14 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| swaprb_xxxa_portable(dst, src, count); |
| } |
| +static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + xxx_xxxa_portable(dst, src, count); |
| +} |
| + |
| +static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + xxx_swaprb_xxxa_portable(dst, src, count); |
| +} |
| + |
| #endif |
| } |