OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkBlitRow_opts_SSE4.h" | 8 #include "SkBlitRow_opts_SSE4.h" |
9 | 9 |
10 // Some compilers can't compile SSSE3 or SSE4 intrinsics. We give them stub met
hods. | 10 // Some compilers can't compile SSSE3 or SSE4 intrinsics. We give them stub met
hods. |
11 // The stubs should never be called, so we make them crash just to confirm that. | 11 // The stubs should never be called, so we make them crash just to confirm that. |
12 #if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSE41 | 12 #if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSE41 |
13 void S32A_Opaque_BlitRow32_SSE4(SkPMColor* SK_RESTRICT, const SkPMColor* SK_REST
RICT, int, U8CPU) { | 13 void S32A_Opaque_BlitRow32_SSE4(SkPMColor* SK_RESTRICT, const SkPMColor* SK_REST
RICT, int, U8CPU) { |
14 sk_throw(); | 14 sk_throw(); |
15 } | 15 } |
16 | 16 |
17 #else | 17 #else |
18 | 18 |
19 #include <smmintrin.h> // SSE4.1 intrinsics | 19 #include <smmintrin.h> // SSE4.1 intrinsics |
20 #include "SkColorPriv.h" | 20 #include "SkColorPriv.h" |
21 #include "SkColor_opts_SSE2.h" | 21 #include "SkColor_opts_SSE2.h" |
| 22 #include "SkMSAN.h" |
22 | 23 |
23 void S32A_Opaque_BlitRow32_SSE4(SkPMColor* SK_RESTRICT dst, | 24 void S32A_Opaque_BlitRow32_SSE4(SkPMColor* SK_RESTRICT dst, |
24 const SkPMColor* SK_RESTRICT src, | 25 const SkPMColor* SK_RESTRICT src, |
25 int count, | 26 int count, |
26 U8CPU alpha) { | 27 U8CPU alpha) { |
| 28 sk_msan_assert_initialized(src, src+count); |
| 29 |
27 SkASSERT(alpha == 255); | 30 SkASSERT(alpha == 255); |
28 // As long as we can, we'll work on 16 pixel pairs at once. | 31 // As long as we can, we'll work on 16 pixel pairs at once. |
29 int count16 = count / 16; | 32 int count16 = count / 16; |
30 __m128i* dst4 = (__m128i*)dst; | 33 __m128i* dst4 = (__m128i*)dst; |
31 const __m128i* src4 = (const __m128i*)src; | 34 const __m128i* src4 = (const __m128i*)src; |
32 | 35 |
33 for (int i = 0; i < count16 * 4; i += 4) { | 36 for (int i = 0; i < count16 * 4; i += 4) { |
34 // Load 16 source pixels. | 37 // Load 16 source pixels. |
35 __m128i s0 = _mm_loadu_si128(src4+i+0), | 38 __m128i s0 = _mm_loadu_si128(src4+i+0), |
36 s1 = _mm_loadu_si128(src4+i+1), | 39 s1 = _mm_loadu_si128(src4+i+1), |
(...skipping 25 matching lines...) Expand all Loading... |
62 // Wrap up the last <= 15 pixels. | 65 // Wrap up the last <= 15 pixels. |
63 for (int i = count16*16; i < count; i++) { | 66 for (int i = count16*16; i < count; i++) { |
64 // This check is not really necessarily, but it prevents pointless autov
ectorization. | 67 // This check is not really necessarily, but it prevents pointless autov
ectorization. |
65 if (src[i] & 0xFF000000) { | 68 if (src[i] & 0xFF000000) { |
66 dst[i] = SkPMSrcOver(src[i], dst[i]); | 69 dst[i] = SkPMSrcOver(src[i], dst[i]); |
67 } | 70 } |
68 } | 71 } |
69 } | 72 } |
70 | 73 |
71 #endif | 74 #endif |
OLD | NEW |