| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include <emmintrin.h> | 8 #include <emmintrin.h> |
| 9 #include "SkBitmapProcState_opts_SSE2.h" | 9 #include "SkBitmapProcState_opts_SSE2.h" |
| 10 #include "SkBlitRow_opts_SSE2.h" | 10 #include "SkBlitRow_opts_SSE2.h" |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 60 } | 60 } |
| 61 | 61 |
| 62 while (count > 0) { | 62 while (count > 0) { |
| 63 *dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale); | 63 *dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale); |
| 64 src++; | 64 src++; |
| 65 dst++; | 65 dst++; |
| 66 count--; | 66 count--; |
| 67 } | 67 } |
| 68 } | 68 } |
| 69 | 69 |
| 70 void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, | |
| 71 const SkPMColor* SK_RESTRICT src, | |
| 72 int count, U8CPU alpha) { | |
| 73 sk_msan_assert_initialized(src, src+count); | |
| 74 | |
| 75 SkASSERT(alpha == 255); | |
| 76 if (count <= 0) { | |
| 77 return; | |
| 78 } | |
| 79 | |
| 80 int count16 = count / 16; | |
| 81 __m128i* dst4 = (__m128i*)dst; | |
| 82 const __m128i* src4 = (const __m128i*)src; | |
| 83 | |
| 84 for (int i = 0; i < count16 * 4; i += 4) { | |
| 85 // Load 16 source pixels. | |
| 86 __m128i s0 = _mm_loadu_si128(src4+i+0), | |
| 87 s1 = _mm_loadu_si128(src4+i+1), | |
| 88 s2 = _mm_loadu_si128(src4+i+2), | |
| 89 s3 = _mm_loadu_si128(src4+i+3); | |
| 90 | |
| 91 const __m128i alphaMask = _mm_set1_epi32(0xFF << SK_A32_SHIFT); | |
| 92 const __m128i ORed = _mm_or_si128(s3, _mm_or_si128(s2, _mm_or_si128(s1,
s0))); | |
| 93 __m128i cmp = _mm_cmpeq_epi8(_mm_and_si128(ORed, alphaMask), _mm_setzero
_si128()); | |
| 94 if (0xffff == _mm_movemask_epi8(cmp)) { | |
| 95 // All 16 source pixels are fully transparent. There's nothing to do
! | |
| 96 continue; | |
| 97 } | |
| 98 const __m128i ANDed = _mm_and_si128(s3, _mm_and_si128(s2, _mm_and_si128(
s1, s0))); | |
| 99 cmp = _mm_cmpeq_epi8(_mm_and_si128(ANDed, alphaMask), alphaMask); | |
| 100 if (0xffff == _mm_movemask_epi8(cmp)) { | |
| 101 // All 16 source pixels are fully opaque. There's no need to read ds
t or blend it. | |
| 102 _mm_storeu_si128(dst4+i+0, s0); | |
| 103 _mm_storeu_si128(dst4+i+1, s1); | |
| 104 _mm_storeu_si128(dst4+i+2, s2); | |
| 105 _mm_storeu_si128(dst4+i+3, s3); | |
| 106 continue; | |
| 107 } | |
| 108 // The general slow case: do the blend for all 16 pixels. | |
| 109 _mm_storeu_si128(dst4+i+0, SkPMSrcOver_SSE2(s0, _mm_loadu_si128(dst4+i+0
))); | |
| 110 _mm_storeu_si128(dst4+i+1, SkPMSrcOver_SSE2(s1, _mm_loadu_si128(dst4+i+1
))); | |
| 111 _mm_storeu_si128(dst4+i+2, SkPMSrcOver_SSE2(s2, _mm_loadu_si128(dst4+i+2
))); | |
| 112 _mm_storeu_si128(dst4+i+3, SkPMSrcOver_SSE2(s3, _mm_loadu_si128(dst4+i+3
))); | |
| 113 } | |
| 114 | |
| 115 // Wrap up the last <= 15 pixels. | |
| 116 SkASSERT(count - (count16*16) <= 15); | |
| 117 for (int i = count16*16; i < count; i++) { | |
| 118 // This check is not really necessarily, but it prevents pointless autov
ectorization. | |
| 119 if (src[i] & 0xFF000000) { | |
| 120 dst[i] = SkPMSrcOver(src[i], dst[i]); | |
| 121 } | |
| 122 } | |
| 123 } | |
| 124 | |
| 125 void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, | 70 void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, |
| 126 const SkPMColor* SK_RESTRICT src, | 71 const SkPMColor* SK_RESTRICT src, |
| 127 int count, U8CPU alpha) { | 72 int count, U8CPU alpha) { |
| 128 SkASSERT(alpha <= 255); | 73 SkASSERT(alpha <= 255); |
| 129 if (count <= 0) { | 74 if (count <= 0) { |
| 130 return; | 75 return; |
| 131 } | 76 } |
| 132 | 77 |
| 133 if (count >= 4) { | 78 if (count >= 4) { |
| 134 while (((size_t)dst & 0x0F) != 0) { | 79 while (((size_t)dst & 0x0F) != 0) { |
| (...skipping 906 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1041 uint32_t dst_expanded = SkExpand_rgb_16(*dst); | 986 uint32_t dst_expanded = SkExpand_rgb_16(*dst); |
| 1042 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); | 987 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); |
| 1043 // now src and dst expanded are in g:11 r:10 x:1 b:10 | 988 // now src and dst expanded are in g:11 r:10 x:1 b:10 |
| 1044 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); | 989 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); |
| 1045 } | 990 } |
| 1046 dst += 1; | 991 dst += 1; |
| 1047 DITHER_INC_X(x); | 992 DITHER_INC_X(x); |
| 1048 } while (--count != 0); | 993 } while (--count != 0); |
| 1049 } | 994 } |
| 1050 } | 995 } |
| OLD | NEW |