Index: src/opts/SkSwizzler_opts.h |
diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h |
index ad121cfafecc42d33fddaa2227ed9fd0693e659b..14960f3b8f0b36ab09fd0dd49f3da55509347e30 100644 |
--- a/src/opts/SkSwizzler_opts.h |
+++ b/src/opts/SkSwizzler_opts.h |
@@ -358,12 +358,47 @@ static void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) { |
RGBA_to_BGRA_portable(dst, src, count); |
} |
+template <bool kSwapRB> |
+static void insert_alpha_should_swaprb(uint32_t dst[], const void* vsrc, int count) { |
+ const uint8_t* src = (const uint8_t*) vsrc; |
+ |
+ const __m128i alphaMask = _mm_set1_epi32(0xFF000000); |
+ __m128i expand; |
+ const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant. |
+ if (kSwapRB) { |
+ expand = _mm_setr_epi8(2,1,0,X, 5,4,3,X, 8,7,6,X, 11,10,9,X); |
+ } else { |
+ expand = _mm_setr_epi8(0,1,2,X, 3,4,5,X, 6,7,8,X, 9,10,11,X); |
+ } |
+ |
+ while (count >= 6) { |
+ // Load a vector. While this actually contains 5 pixels plus an |
+ // extra component, we will discard all but the first four pixels on |
+ // this iteration. |
+ __m128i rgb = _mm_loadu_si128((const __m128i*) src); |
+ |
+ // Expand the first four pixels to RGBX and then mask to RGB(FF). |
+ __m128i rgba = _mm_or_si128(_mm_shuffle_epi8(rgb, expand), alphaMask); |
+ |
+ // Store 4 pixels. |
+ _mm_storeu_si128((__m128i*) dst, rgba); |
+ |
+ src += 4*3; |
+ dst += 4; |
+ count -= 4; |
+ } |
+ |
+ // Call portable code to finish up the tail of [0,4) pixels. |
+ auto proc = kSwapRB ? RGB_to_BGR1_portable : RGB_to_RGB1_portable; |
+ proc(dst, src, count); |
+} |
+ |
static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { |
- RGB_to_RGB1_portable(dst, src, count); |
+ insert_alpha_should_swaprb<false>(dst, src, count); |
} |
static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { |
- RGB_to_BGR1_portable(dst, src, count); |
+ insert_alpha_should_swaprb<true>(dst, src, count); |
} |
#else |