Index: src/opts/SkBlitRow_opts_SSE2.cpp |
diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp |
index 47651c460bcd683b9cbd4cfc8998e9ae244c1dc1..9e99b4bc4654da338c8fa0d92ddb5b728131497d 100644 |
--- a/src/opts/SkBlitRow_opts_SSE2.cpp |
+++ b/src/opts/SkBlitRow_opts_SSE2.cpp |
@@ -853,6 +853,83 @@ void SkBlitLCD16OpaqueRow_SSE2(SkPMColor dst[], const uint16_t mask[], |
} |
} |
+/* SSE2 version of S32_D565_Opaque() |
+ * portable version is in core/SkBlitRow_D16.cpp |
+ */ |
+void S32_D565_Opaque_SSE2(uint16_t* SK_RESTRICT dst, |
+ const SkPMColor* SK_RESTRICT src, int count, |
+ U8CPU alpha, int /*x*/, int /*y*/) { |
+ SkASSERT(255 == alpha); |
+ |
+ if (count <= 0) { |
+ return; |
+ } |
+ |
+ if (count >= 8) { |
+ while (((size_t)dst & 0x0F) != 0) { |
+ SkPMColor c = *src++; |
+ SkPMColorAssert(c); |
+ |
+ *dst++ = SkPixel32ToPixel16_ToU16(c); |
+ count--; |
+ } |
+ |
+ const __m128i* s = reinterpret_cast<const __m128i*>(src); |
+ __m128i* d = reinterpret_cast<__m128i*>(dst); |
+ __m128i r16_mask = _mm_set1_epi32(SK_R16_MASK); |
+ __m128i g16_mask = _mm_set1_epi32(SK_G16_MASK); |
+ __m128i b16_mask = _mm_set1_epi32(SK_B16_MASK); |
+ |
+ while (count >= 8) { |
+ // Load 8 pixels of src. |
+ __m128i src_pixel1 = _mm_loadu_si128(s++); |
+ __m128i src_pixel2 = _mm_loadu_si128(s++); |
+ |
+ // Calculate result r. |
+ __m128i r1 = _mm_srli_epi32(src_pixel1, |
+ SK_R32_SHIFT + (8 - SK_R16_BITS)); |
+ r1 = _mm_and_si128(r1, r16_mask); |
+ __m128i r2 = _mm_srli_epi32(src_pixel2, |
+ SK_R32_SHIFT + (8 - SK_R16_BITS)); |
+ r2 = _mm_and_si128(r2, r16_mask); |
+ __m128i r = _mm_packs_epi32(r1, r2); |
+ |
+ // Calculate result g. |
+ __m128i g1 = _mm_srli_epi32(src_pixel1, |
+ SK_G32_SHIFT + (8 - SK_G16_BITS)); |
+ g1 = _mm_and_si128(g1, g16_mask); |
+ __m128i g2 = _mm_srli_epi32(src_pixel2, |
+ SK_G32_SHIFT + (8 - SK_G16_BITS)); |
+ g2 = _mm_and_si128(g2, g16_mask); |
+ __m128i g = _mm_packs_epi32(g1, g2); |
+ |
+ // Calculate result b. |
+ __m128i b1 = _mm_srli_epi32(src_pixel1, |
+ SK_B32_SHIFT + (8 - SK_B16_BITS)); |
+ b1 = _mm_and_si128(b1, b16_mask); |
+ __m128i b2 = _mm_srli_epi32(src_pixel2, |
+ SK_B32_SHIFT + (8 - SK_B16_BITS)); |
+ b2 = _mm_and_si128(b2, b16_mask); |
+ __m128i b = _mm_packs_epi32(b1, b2); |
+ |
+ // Store 8 16-bit colors in dst. |
+ __m128i d_pixel = SkPackRGB16_SSE(r, g, b); |
+ _mm_store_si128(d++, d_pixel); |
+ count -= 8; |
+ } |
+ src = reinterpret_cast<const SkPMColor*>(s); |
+ dst = reinterpret_cast<uint16_t*>(d); |
+ } |
+ |
+ if (count > 0) { |
+ do { |
+ SkPMColor c = *src++; |
+ SkPMColorAssert(c); |
+ *dst++ = SkPixel32ToPixel16_ToU16(c); |
+ } while (--count != 0); |
+ } |
+} |
+ |
/* SSE2 version of S32A_D565_Opaque() |
* portable version is in core/SkBlitRow_D16.cpp |
*/ |