Chromium Code Reviews| Index: src/opts/SkSwizzler_opts.h |
| diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h |
| index b0cf4cad5324eae5eb64fcce75eb755d253a02d5..baef30d4ddcae5ddbc20ad32246fdceb71ab6fac 100644 |
| --- a/src/opts/SkSwizzler_opts.h |
| +++ b/src/opts/SkSwizzler_opts.h |
| @@ -60,6 +60,34 @@ static void swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count |
| } |
| } |
| +static void xxx_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) { |
|
mtklein
2016/01/22 14:37:32
src is three-byte right, not some sort of RGBx? S
msarett
2016/01/22 15:00:36
Agreed. I made a similar comment in Patch Set 1.
mtklein
2016/01/22 15:23:57
Good. Was going to suggest that. :)
While they'r
msarett
2016/01/22 17:27:23
Done.
|
| + int i8 = 0; |
| + const uint8_t* src8 = (const uint8_t*) src; |
| + for (int i32 = 0; i32 < count; i32++) { |
| + uint8_t b = src8[i8++], |
| + g = src8[i8++], |
| + r = src8[i8++]; |
| + dst[i32] = (uint32_t) b << 0 |
|
mtklein
2016/01/22 14:37:32
Let's keep our order consistent with the rest of t
msarett
2016/01/22 15:00:36
Done.
|
| + | (uint32_t) g << 8 |
| + | (uint32_t) r << 16 |
| + | (uint32_t)0xFF << 24; |
| + } |
| +} |
| + |
| +static void xxx_swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) { |
| + int i8 = 0; |
| + const uint8_t* src8 = (const uint8_t*) src; |
| + for (int i32 = 0; i32 < count; i32++) { |
| + uint8_t b = src8[i8++], |
| + g = src8[i8++], |
| + r = src8[i8++]; |
| + dst[i32] = (uint32_t) r << 0 |
| + | (uint32_t) g << 8 |
| + | (uint32_t) b << 16 |
| + | (uint32_t)0xFF << 24; |
| + } |
| +} |
| + |
| #if defined(SK_ARM_HAS_NEON) |
| // Rounded divide by 255, (x + 127) / 255 |
| @@ -168,6 +196,68 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| swaprb_xxxa_portable(dst, src, count); |
| } |
| +template <bool kSwapRB> |
| +static void xxx_xxxa_should_swaprb(uint32_t dst[], const uint32_t src[], int count) { |
| + const uint8_t* src8 = (const uint8_t*) src; |
| + while (count >= 16) { |
| + // Load 16 pixels. |
| + uint8x16x3_t bgr = vld3q_u8(src8); |
| + |
| + // Insert an opaque alpha channel and swap if needed. |
| + uint8x16x4_t bgra; |
| + if (kSwapRB) { |
| + bgra.val[0] = bgr.val[2]; |
| + bgra.val[2] = bgr.val[0]; |
| + } else { |
| + bgra.val[0] = bgr.val[0]; |
| + bgra.val[2] = bgr.val[2]; |
| + } |
| + bgra.val[1] = bgr.val[1]; |
| + bgra.val[3] = vdupq_n_u8(0xFF); |
| + |
| + // Store 16 pixels. |
| + vst4q_u8((uint8_t*) dst, bgra); |
| + src8 += 48; |
|
mtklein
2016/01/22 14:37:33
might write this as += 16*3?
I find it really ple
msarett
2016/01/22 15:00:36
Done.
|
| + dst += 16; |
| + count -= 16; |
| + } |
| + |
| + if (count >= 8) { |
| + // Load 8 pixels. |
| + uint8x8x3_t bgr = vld3_u8(src8); |
| + |
| + // Insert an opaque alpha channel and swap if needed. |
| + uint8x8x4_t bgra; |
| + if (kSwapRB) { |
| + bgra.val[0] = bgr.val[2]; |
| + bgra.val[2] = bgr.val[0]; |
| + } else { |
| + bgra.val[0] = bgr.val[0]; |
| + bgra.val[2] = bgr.val[2]; |
| + } |
| + bgra.val[1] = bgr.val[1]; |
| + bgra.val[3] = vdup_n_u8(0xFF); |
| + |
| + // Store 8 pixels. |
| + vst4_u8((uint8_t*) dst, bgra); |
| + src8 += 24; |
| + dst += 8; |
| + count -= 8; |
| + } |
| + |
| + // Call portable code to finish up the tail of [0,8) pixels. |
| + auto proc = kSwapRB ? xxx_swaprb_xxxa_portable : xxx_xxxa_portable; |
| + proc(dst, (const uint32_t*) src8, count); |
| +} |
| + |
| +static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + xxx_xxxa_should_swaprb<false>(dst, src, count); |
| +} |
| + |
| +static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + xxx_xxxa_should_swaprb<true>(dst, src, count); |
| +} |
| + |
| #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| template <bool kSwapRB> |
| @@ -264,6 +354,14 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| swaprb_xxxa_portable(dst, src, count); |
| } |
| +static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + xxx_xxxa_portable(dst, src, count); |
| +} |
| + |
| +static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + xxx_swaprb_xxxa_portable(dst, src, count); |
| +} |
| + |
| #else |
| static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| @@ -278,6 +376,14 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| swaprb_xxxa_portable(dst, src, count); |
| } |
| +static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + xxx_xxxa_portable(dst, src, count); |
| +} |
| + |
| +static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + xxx_swaprb_xxxa_portable(dst, src, count); |
| +} |
| + |
| #endif |
| } |