OLD | NEW |
1 #include "SkBlitRow_opts_SSE4.h" | 1 #include "SkBlitRow_opts_SSE4.h" |
2 | 2 |
3 // Some compilers can't compile SSSE3 or SSE4 intrinsics. We give them stub met
hods. | 3 // Some compilers can't compile SSSE3 or SSE4 intrinsics. We give them stub met
hods. |
4 // The stubs should never be called, so we make them crash just to confirm that. | 4 // The stubs should never be called, so we make them crash just to confirm that. |
5 #if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSE41 | 5 #if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSE41 |
6 void S32A_Opaque_BlitRow32_SSE4(SkPMColor* SK_RESTRICT, const SkPMColor* SK_REST
RICT, int, U8CPU) { | 6 void S32A_Opaque_BlitRow32_SSE4(SkPMColor* SK_RESTRICT, const SkPMColor* SK_REST
RICT, int, U8CPU) { |
7 sk_throw(); | 7 sk_throw(); |
8 } | 8 } |
9 | 9 |
10 #else | 10 #else |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
48 continue; | 48 continue; |
49 } | 49 } |
50 // The general slow case: do the blend for all 16 pixels. | 50 // The general slow case: do the blend for all 16 pixels. |
51 _mm_storeu_si128(dst4+i+0, SkPMSrcOver_SSE2(s0, _mm_loadu_si128(dst4+i+0
))); | 51 _mm_storeu_si128(dst4+i+0, SkPMSrcOver_SSE2(s0, _mm_loadu_si128(dst4+i+0
))); |
52 _mm_storeu_si128(dst4+i+1, SkPMSrcOver_SSE2(s1, _mm_loadu_si128(dst4+i+1
))); | 52 _mm_storeu_si128(dst4+i+1, SkPMSrcOver_SSE2(s1, _mm_loadu_si128(dst4+i+1
))); |
53 _mm_storeu_si128(dst4+i+2, SkPMSrcOver_SSE2(s2, _mm_loadu_si128(dst4+i+2
))); | 53 _mm_storeu_si128(dst4+i+2, SkPMSrcOver_SSE2(s2, _mm_loadu_si128(dst4+i+2
))); |
54 _mm_storeu_si128(dst4+i+3, SkPMSrcOver_SSE2(s3, _mm_loadu_si128(dst4+i+3
))); | 54 _mm_storeu_si128(dst4+i+3, SkPMSrcOver_SSE2(s3, _mm_loadu_si128(dst4+i+3
))); |
55 } | 55 } |
56 | 56 |
57 // Wrap up the last <= 15 pixels. | 57 // Wrap up the last <= 15 pixels. |
| 58 SkASSERT(count - (count16*16) <= 15); |
58 for (int i = count16*16; i < count; i++) { | 59 for (int i = count16*16; i < count; i++) { |
59 // This check is not really necessarily, but it prevents pointless autov
ectorization. | 60 // This check is not really necessarily, but it prevents pointless autov
ectorization. |
60 if (src[i] & 0xFF000000) { | 61 if (src[i] & 0xFF000000) { |
61 dst[i] = SkPMSrcOver(src[i], dst[i]); | 62 dst[i] = SkPMSrcOver(src[i], dst[i]); |
62 } | 63 } |
63 } | 64 } |
64 } | 65 } |
65 | 66 |
66 #endif | 67 #endif |
OLD | NEW |