Chromium Code Reviews| Index: src/opts/SkSwizzler_opts.h |
| diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h |
| index 467e5d1940624d6571c7561cafc26e6c8a776fa0..c7edcb9ece068ec29b321b95295163e908b6f004 100644 |
| --- a/src/opts/SkSwizzler_opts.h |
| +++ b/src/opts/SkSwizzler_opts.h |
| @@ -47,6 +47,19 @@ static void premul_swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], in |
| } |
| } |
| +static void swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) { |
| + for (int i = 0; i < count; i++) { |
| + uint8_t a = src[i] >> 24, |
| + r = src[i] >> 16, |
| + g = src[i] >> 8, |
| + b = src[i] >> 0; |
| + dst[i] = (uint32_t)a << 24 |
| + | (uint32_t)b << 16 |
| + | (uint32_t)g << 8 |
| + | (uint32_t)r << 0; |
| + } |
| +} |
| + |
| #if defined(SK_ARM_HAS_NEON) |
| // Rounded divide by 255, (x + 127) / 255 |
| @@ -123,6 +136,44 @@ static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) |
| premul_xxxa_should_swaprb<true>(dst, src, count); |
| } |
| +static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| + while (count >= 16) { |
| + // Load 16 pixels. |
| + uint8x16x4_t bgra = vld4q_u8((const uint8_t*) src); |
| + |
| + // Swap r and b. |
| + uint8x16_t r = bgra.val[2], |
|
msarett
2016/01/14 17:08:25
No matter how I write this code, clang really want
|
| + b = bgra.val[0]; |
| + bgra.val[2] = b; |
| + bgra.val[0] = r; |
| + |
| + // Store 16 pixels. |
| + vst4q_u8((uint8_t*) dst, bgra); |
| + src += 16; |
| + dst += 16; |
| + count -= 16; |
| + } |
| + |
| + while (count >= 8) { |
| + // Load 8 pixels. |
| + uint8x8x4_t bgra = vld4_u8((const uint8_t*) src); |
| + |
| + // Swap r and b. |
| + uint8x8_t r = bgra.val[2], |
| + b = bgra.val[0]; |
| + bgra.val[2] = b; |
| + bgra.val[0] = r; |
| + |
| + // Store 8 pixels. |
| + vst4_u8((uint8_t*) dst, bgra); |
| + src += 8; |
| + dst += 8; |
| + count -= 8; |
| + } |
| + |
| + swaprb_xxxa_portable(dst, src, count); |
| +} |
| + |
| #else |
| static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| @@ -133,21 +184,12 @@ static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) |
| premul_swaprb_xxxa_portable(dst, src, count); |
| } |
| -#endif |
| - |
| static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| - for (int i = 0; i < count; i++) { |
| - uint8_t a = src[i] >> 24, |
| - r = src[i] >> 16, |
| - g = src[i] >> 8, |
| - b = src[i] >> 0; |
| - dst[i] = (uint32_t)a << 24 |
| - | (uint32_t)b << 16 |
| - | (uint32_t)g << 8 |
| - | (uint32_t)r << 0; |
| - } |
| + swaprb_xxxa_portable(dst, src, count); |
| } |
| +#endif |
| + |
| } |
| #endif // SkSwizzler_opts_DEFINED |