OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2014 The Android Open Source Project | 2 * Copyright 2014 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkColor_opts_SSE2_DEFINED | 8 #ifndef SkColor_opts_SSE2_DEFINED |
9 #define SkColor_opts_SSE2_DEFINED | 9 #define SkColor_opts_SSE2_DEFINED |
10 | 10 |
11 #include <emmintrin.h> | 11 #include <emmintrin.h> |
12 | 12 |
13 static inline __m128i SkMul16ShiftRound_SSE(__m128i a, __m128i b, int shift) { | 13 static inline __m128i SkAlphaMulAlpha_SSE2(__m128i a, __m128i b) { |
mtklein
2014/03/27 14:55:17
Can you add on // See SkAlphaMulAlpha / SkMulDiv25
qiankun
2014/03/28 07:52:57
Done.
| |
14 __m128i prod = _mm_mullo_epi16(a, b); | |
15 prod = _mm_add_epi32(prod, _mm_set1_epi32(128)); | |
16 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); | |
17 prod = _mm_srli_epi32(prod, 8); | |
18 | |
19 return prod; | |
20 } | |
21 | |
22 static inline __m128i SkGetPackedA32_SSE2(__m128i src) { | |
23 __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT)); | |
24 return _mm_srli_epi32(a, 24); | |
25 } | |
26 | |
27 static inline __m128i SkGetPackedR32_SSE2(__m128i src) { | |
28 __m128i r = _mm_slli_epi32(src, (24 - SK_R32_SHIFT)); | |
29 return _mm_srli_epi32(r, 24); | |
30 } | |
31 | |
32 static inline __m128i SkGetPackedG32_SSE2(__m128i src) { | |
33 __m128i g = _mm_slli_epi32(src, (24 - SK_G32_SHIFT)); | |
34 return _mm_srli_epi32(g, 24); | |
35 } | |
36 | |
37 static inline __m128i SkGetPackedB32_SSE2(__m128i src) { | |
38 __m128i b = _mm_slli_epi32(src, (24 - SK_B32_SHIFT)); | |
39 return _mm_srli_epi32(b, 24); | |
40 } | |
41 | |
42 static inline __m128i SkMul16ShiftRound_SSE2(__m128i a, | |
43 __m128i b, int shift) { | |
14 __m128i prod = _mm_mullo_epi16(a, b); | 44 __m128i prod = _mm_mullo_epi16(a, b); |
15 prod = _mm_add_epi16(prod, _mm_set1_epi16(1 << (shift - 1))); | 45 prod = _mm_add_epi16(prod, _mm_set1_epi16(1 << (shift - 1))); |
16 prod = _mm_add_epi16(prod, _mm_srli_epi16(prod, shift)); | 46 prod = _mm_add_epi16(prod, _mm_srli_epi16(prod, shift)); |
17 prod = _mm_srli_epi16(prod, shift); | 47 prod = _mm_srli_epi16(prod, shift); |
18 | 48 |
19 return prod; | 49 return prod; |
20 } | 50 } |
21 | 51 |
22 static inline __m128i SkPackRGB16_SSE(__m128i r, __m128i g, __m128i b) { | 52 static inline __m128i SkPackRGB16_SSE2(__m128i r, __m128i g, __m128i b) { |
23 r = _mm_slli_epi16(r, SK_R16_SHIFT); | 53 r = _mm_slli_epi16(r, SK_R16_SHIFT); |
24 g = _mm_slli_epi16(g, SK_G16_SHIFT); | 54 g = _mm_slli_epi16(g, SK_G16_SHIFT); |
25 b = _mm_slli_epi16(b, SK_B16_SHIFT); | 55 b = _mm_slli_epi16(b, SK_B16_SHIFT); |
26 | 56 |
27 __m128i c = _mm_or_si128(r, g); | 57 __m128i c = _mm_or_si128(r, g); |
28 return _mm_or_si128(c, b); | 58 return _mm_or_si128(c, b); |
29 } | 59 } |
30 | 60 |
31 #endif//SkColor_opts_SSE2_DEFINED | 61 static inline __m128i SkPackARGB32_SSE2(__m128i a, __m128i r, |
62 __m128i g, __m128i b) { | |
63 a = _mm_slli_epi32(a, SK_A32_SHIFT); | |
64 r = _mm_slli_epi32(r, SK_R32_SHIFT); | |
65 g = _mm_slli_epi32(g, SK_G32_SHIFT); | |
66 b = _mm_slli_epi32(b, SK_B32_SHIFT); | |
67 | |
68 __m128i c = _mm_or_si128(a, r); | |
69 c = _mm_or_si128(c, g); | |
70 return _mm_or_si128(c, b); | |
71 } | |
72 | |
73 static inline __m128i SkPacked16ToR32_SSE2(__m128i src) { | |
74 __m128i r = _mm_srli_epi32(src, SK_R16_SHIFT); | |
75 r = _mm_and_si128(r, _mm_set1_epi32(SK_R16_MASK)); | |
76 r = _mm_or_si128(_mm_slli_epi32(r, (8 - SK_R16_BITS)), | |
77 _mm_srli_epi32(r, (2 * SK_R16_BITS - 8))); | |
78 | |
79 return r; | |
80 } | |
81 | |
82 static inline __m128i SkPacked16ToG32_SSE2(__m128i src) { | |
83 __m128i g = _mm_srli_epi32(src, SK_G16_SHIFT); | |
84 g = _mm_and_si128(g, _mm_set1_epi32(SK_G16_MASK)); | |
85 g = _mm_or_si128(_mm_slli_epi32(g, (8 - SK_G16_BITS)), | |
86 _mm_srli_epi32(g, (2 * SK_G16_BITS - 8))); | |
87 | |
88 return g; | |
89 } | |
90 | |
91 static inline __m128i SkPacked16ToB32_SSE2(__m128i src) { | |
92 __m128i b = _mm_srli_epi32(src, SK_B16_SHIFT); | |
93 b = _mm_and_si128(b, _mm_set1_epi32(SK_B16_MASK)); | |
94 b = _mm_or_si128(_mm_slli_epi32(b, (8 - SK_B16_BITS)), | |
95 _mm_srli_epi32(b, (2 * SK_B16_BITS - 8))); | |
96 | |
97 return b; | |
98 } | |
99 | |
100 static inline __m128i SkPixel16ToPixel32_SSE2(__m128i src) { | |
101 __m128i r = SkPacked16ToR32_SSE2(src); | |
102 __m128i g = SkPacked16ToG32_SSE2(src); | |
103 __m128i b = SkPacked16ToB32_SSE2(src); | |
104 | |
105 return SkPackARGB32_SSE2(_mm_set1_epi32(0xFF), r, g, b); | |
106 } | |
107 | |
108 static inline __m128i SkPixel32ToPixel16_ToU16_SSE2(__m128i src_pixel1, __m128i src_pixel2) { | |
109 // Calculate result r. | |
110 __m128i r1 = _mm_srli_epi32(src_pixel1, | |
111 SK_R32_SHIFT + (8 - SK_R16_BITS)); | |
112 r1 = _mm_and_si128(r1, _mm_set1_epi32(SK_R16_MASK)); | |
113 __m128i r2 = _mm_srli_epi32(src_pixel2, | |
114 SK_R32_SHIFT + (8 - SK_R16_BITS)); | |
115 r2 = _mm_and_si128(r2, _mm_set1_epi32(SK_R16_MASK)); | |
116 __m128i r = _mm_packs_epi32(r1, r2); | |
117 | |
118 // Calculate result g. | |
119 __m128i g1 = _mm_srli_epi32(src_pixel1, | |
120 SK_G32_SHIFT + (8 - SK_G16_BITS)); | |
121 g1 = _mm_and_si128(g1, _mm_set1_epi32(SK_G16_MASK)); | |
122 __m128i g2 = _mm_srli_epi32(src_pixel2, | |
123 SK_G32_SHIFT + (8 - SK_G16_BITS)); | |
124 g2 = _mm_and_si128(g2, _mm_set1_epi32(SK_G16_MASK)); | |
125 __m128i g = _mm_packs_epi32(g1, g2); | |
126 | |
127 // Calculate result b. | |
128 __m128i b1 = _mm_srli_epi32(src_pixel1, | |
129 SK_B32_SHIFT + (8 - SK_B16_BITS)); | |
130 b1 = _mm_and_si128(b1, _mm_set1_epi32(SK_B16_MASK)); | |
131 __m128i b2 = _mm_srli_epi32(src_pixel2, | |
132 SK_B32_SHIFT + (8 - SK_B16_BITS)); | |
133 b2 = _mm_and_si128(b2, _mm_set1_epi32(SK_B16_MASK)); | |
134 __m128i b = _mm_packs_epi32(b1, b2); | |
135 | |
136 // Store 8 16-bit colors in dst. | |
137 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b); | |
138 | |
139 return d_pixel; | |
140 } | |
141 | |
142 #endif // SkColor_opts_SSE2_DEFINED | |
OLD | NEW |