| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 | 8 |
| 9 #include "SkBlitRow_opts_SSE2.h" | 9 #include "SkBlitRow_opts_SSE2.h" |
| 10 #include "SkBitmapProcState_opts_SSE2.h" | 10 #include "SkBitmapProcState_opts_SSE2.h" |
| (...skipping 835 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 846 } | 846 } |
| 847 | 847 |
| 848 while (width > 0) { | 848 while (width > 0) { |
| 849 *dst = SkBlendLCD16Opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst); | 849 *dst = SkBlendLCD16Opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst); |
| 850 mask++; | 850 mask++; |
| 851 dst++; | 851 dst++; |
| 852 width--; | 852 width--; |
| 853 } | 853 } |
| 854 } | 854 } |
| 855 | 855 |
| 856 /* SSE2 version of S32_D565_Opaque() |
| 857 * portable version is in core/SkBlitRow_D16.cpp |
| 858 */ |
| 859 void S32_D565_Opaque_SSE2(uint16_t* SK_RESTRICT dst, |
| 860 const SkPMColor* SK_RESTRICT src, int count, |
| 861 U8CPU alpha, int /*x*/, int /*y*/) { |
| 862 SkASSERT(255 == alpha); |
| 863 |
| 864 if (count <= 0) { |
| 865 return; |
| 866 } |
| 867 |
| 868 if (count >= 8) { |
| 869 while (((size_t)dst & 0x0F) != 0) { |
| 870 SkPMColor c = *src++; |
| 871 SkPMColorAssert(c); |
| 872 |
| 873 *dst++ = SkPixel32ToPixel16_ToU16(c); |
| 874 count--; |
| 875 } |
| 876 |
| 877 const __m128i* s = reinterpret_cast<const __m128i*>(src); |
| 878 __m128i* d = reinterpret_cast<__m128i*>(dst); |
| 879 __m128i r16_mask = _mm_set1_epi32(SK_R16_MASK); |
| 880 __m128i g16_mask = _mm_set1_epi32(SK_G16_MASK); |
| 881 __m128i b16_mask = _mm_set1_epi32(SK_B16_MASK); |
| 882 |
| 883 while (count >= 8) { |
| 884 // Load 8 pixels of src. |
| 885 __m128i src_pixel1 = _mm_loadu_si128(s++); |
| 886 __m128i src_pixel2 = _mm_loadu_si128(s++); |
| 887 |
| 888 // Calculate result r. |
| 889 __m128i r1 = _mm_srli_epi32(src_pixel1, |
| 890 SK_R32_SHIFT + (8 - SK_R16_BITS)); |
| 891 r1 = _mm_and_si128(r1, r16_mask); |
| 892 __m128i r2 = _mm_srli_epi32(src_pixel2, |
| 893 SK_R32_SHIFT + (8 - SK_R16_BITS)); |
| 894 r2 = _mm_and_si128(r2, r16_mask); |
| 895 __m128i r = _mm_packs_epi32(r1, r2); |
| 896 |
| 897 // Calculate result g. |
| 898 __m128i g1 = _mm_srli_epi32(src_pixel1, |
| 899 SK_G32_SHIFT + (8 - SK_G16_BITS)); |
| 900 g1 = _mm_and_si128(g1, g16_mask); |
| 901 __m128i g2 = _mm_srli_epi32(src_pixel2, |
| 902 SK_G32_SHIFT + (8 - SK_G16_BITS)); |
| 903 g2 = _mm_and_si128(g2, g16_mask); |
| 904 __m128i g = _mm_packs_epi32(g1, g2); |
| 905 |
| 906 // Calculate result b. |
| 907 __m128i b1 = _mm_srli_epi32(src_pixel1, |
| 908 SK_B32_SHIFT + (8 - SK_B16_BITS)); |
| 909 b1 = _mm_and_si128(b1, b16_mask); |
| 910 __m128i b2 = _mm_srli_epi32(src_pixel2, |
| 911 SK_B32_SHIFT + (8 - SK_B16_BITS)); |
| 912 b2 = _mm_and_si128(b2, b16_mask); |
| 913 __m128i b = _mm_packs_epi32(b1, b2); |
| 914 |
| 915 // Store 8 16-bit colors in dst. |
| 916 __m128i d_pixel = SkPackRGB16_SSE(r, g, b); |
| 917 _mm_store_si128(d++, d_pixel); |
| 918 count -= 8; |
| 919 } |
| 920 src = reinterpret_cast<const SkPMColor*>(s); |
| 921 dst = reinterpret_cast<uint16_t*>(d); |
| 922 } |
| 923 |
| 924 if (count > 0) { |
| 925 do { |
| 926 SkPMColor c = *src++; |
| 927 SkPMColorAssert(c); |
| 928 *dst++ = SkPixel32ToPixel16_ToU16(c); |
| 929 } while (--count != 0); |
| 930 } |
| 931 } |
| 932 |
| 856 /* SSE2 version of S32A_D565_Opaque() | 933 /* SSE2 version of S32A_D565_Opaque() |
| 857 * portable version is in core/SkBlitRow_D16.cpp | 934 * portable version is in core/SkBlitRow_D16.cpp |
| 858 */ | 935 */ |
| 859 void S32A_D565_Opaque_SSE2(uint16_t* SK_RESTRICT dst, | 936 void S32A_D565_Opaque_SSE2(uint16_t* SK_RESTRICT dst, |
| 860 const SkPMColor* SK_RESTRICT src, | 937 const SkPMColor* SK_RESTRICT src, |
| 861 int count, U8CPU alpha, int /*x*/, int /*y*/) { | 938 int count, U8CPU alpha, int /*x*/, int /*y*/) { |
| 862 SkASSERT(255 == alpha); | 939 SkASSERT(255 == alpha); |
| 863 | 940 |
| 864 if (count <= 0) { | 941 if (count <= 0) { |
| 865 return; | 942 return; |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 967 do { | 1044 do { |
| 968 SkPMColor c = *src++; | 1045 SkPMColor c = *src++; |
| 969 SkPMColorAssert(c); | 1046 SkPMColorAssert(c); |
| 970 if (c) { | 1047 if (c) { |
| 971 *dst = SkSrcOver32To16(c, *dst); | 1048 *dst = SkSrcOver32To16(c, *dst); |
| 972 } | 1049 } |
| 973 dst += 1; | 1050 dst += 1; |
| 974 } while (--count != 0); | 1051 } while (--count != 0); |
| 975 } | 1052 } |
| 976 } | 1053 } |
| OLD | NEW |