Index: src/opts/SkSwizzler_opts.h |
diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h |
index 467e5d1940624d6571c7561cafc26e6c8a776fa0..c7edcb9ece068ec29b321b95295163e908b6f004 100644 |
--- a/src/opts/SkSwizzler_opts.h |
+++ b/src/opts/SkSwizzler_opts.h |
@@ -47,6 +47,19 @@ static void premul_swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], in |
} |
} |
+static void swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) { |
+ for (int i = 0; i < count; i++) { |
+ uint8_t a = src[i] >> 24, |
+ r = src[i] >> 16, |
+ g = src[i] >> 8, |
+ b = src[i] >> 0; |
+ dst[i] = (uint32_t)a << 24 |
+ | (uint32_t)b << 16 |
+ | (uint32_t)g << 8 |
+ | (uint32_t)r << 0; |
+ } |
+} |
+ |
#if defined(SK_ARM_HAS_NEON) |
// Rounded divide by 255, (x + 127) / 255 |
@@ -123,6 +136,44 @@ static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) |
premul_xxxa_should_swaprb<true>(dst, src, count); |
} |
+static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
+ while (count >= 16) { |
+ // Load 16 pixels. |
+ uint8x16x4_t bgra = vld4q_u8((const uint8_t*) src); |
+ |
+ // Swap r and b. |
+ uint8x16_t r = bgra.val[2], |
msarett
2016/01/14 17:08:25
No matter how I write this code, clang really want
|
+ b = bgra.val[0]; |
+ bgra.val[2] = b; |
+ bgra.val[0] = r; |
+ |
+ // Store 16 pixels. |
+ vst4q_u8((uint8_t*) dst, bgra); |
+ src += 16; |
+ dst += 16; |
+ count -= 16; |
+ } |
+ |
+ while (count >= 8) { |
+ // Load 8 pixels. |
+ uint8x8x4_t bgra = vld4_u8((const uint8_t*) src); |
+ |
+ // Swap r and b. |
+ uint8x8_t r = bgra.val[2], |
+ b = bgra.val[0]; |
+ bgra.val[2] = b; |
+ bgra.val[0] = r; |
+ |
+ // Store 8 pixels. |
+ vst4_u8((uint8_t*) dst, bgra); |
+ src += 8; |
+ dst += 8; |
+ count -= 8; |
+ } |
+ |
+ swaprb_xxxa_portable(dst, src, count); |
+} |
+ |
#else |
static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
@@ -133,21 +184,12 @@ static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) |
premul_swaprb_xxxa_portable(dst, src, count); |
} |
-#endif |
- |
static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
- for (int i = 0; i < count; i++) { |
- uint8_t a = src[i] >> 24, |
- r = src[i] >> 16, |
- g = src[i] >> 8, |
- b = src[i] >> 0; |
- dst[i] = (uint32_t)a << 24 |
- | (uint32_t)b << 16 |
- | (uint32_t)g << 8 |
- | (uint32_t)r << 0; |
- } |
+ swaprb_xxxa_portable(dst, src, count); |
} |
+#endif |
+ |
} |
#endif // SkSwizzler_opts_DEFINED |