Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/opts/SkColor_opts_SSE2.h

Issue 724333003: Optimize SkAlphaMulQ_SSE2 (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: undef ASSERT_EQ at the end of file Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 The Android Open Source Project 2 * Copyright 2014 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkColor_opts_SSE2_DEFINED 8 #ifndef SkColor_opts_SSE2_DEFINED
9 #define SkColor_opts_SSE2_DEFINED 9 #define SkColor_opts_SSE2_DEFINED
10 10
11 #include <emmintrin.h> 11 #include <emmintrin.h>
12 12
13 #define ASSERT_EQ(a,b) SkASSERT(0xffff == _mm_movemask_epi8(_mm_cmpeq_epi8((a), (b))))
14
13 // Because no _mm_mul_epi32() in SSE2, we emulate it here. 15 // Because no _mm_mul_epi32() in SSE2, we emulate it here.
14 // Multiplies 4 32-bit integers from a by 4 32-bit intergers from b. 16 // Multiplies 4 32-bit integers from a by 4 32-bit intergers from b.
15 // The 4 multiplication results should be represented within 32-bit 17 // The 4 multiplication results should be represented within 32-bit
16 // integers, otherwise they would be overflow. 18 // integers, otherwise they would be overflow.
17 static inline __m128i Multiply32_SSE2(const __m128i& a, const __m128i& b) { 19 static inline __m128i Multiply32_SSE2(const __m128i& a, const __m128i& b) {
18 // Calculate results of a0 * b0 and a2 * b2. 20 // Calculate results of a0 * b0 and a2 * b2.
19 __m128i r1 = _mm_mul_epu32(a, b); 21 __m128i r1 = _mm_mul_epu32(a, b);
20 // Calculate results of a1 * b1 and a3 * b3. 22 // Calculate results of a1 * b1 and a3 * b3.
21 __m128i r2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); 23 __m128i r2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4));
22 // Shuffle results to [63..0] and interleave the results. 24 // Shuffle results to [63..0] and interleave the results.
(...skipping 22 matching lines...) Expand all
45 __m128i mask = _mm_set1_epi32(0xFF00FF); 47 __m128i mask = _mm_set1_epi32(0xFF00FF);
46 __m128i s = _mm_or_si128(_mm_slli_epi32(scale, 16), scale); 48 __m128i s = _mm_or_si128(_mm_slli_epi32(scale, 16), scale);
47 49
48 // uint32_t rb = ((c & mask) * scale) >> 8 50 // uint32_t rb = ((c & mask) * scale) >> 8
49 __m128i rb = _mm_and_si128(mask, c); 51 __m128i rb = _mm_and_si128(mask, c);
50 rb = _mm_mullo_epi16(rb, s); 52 rb = _mm_mullo_epi16(rb, s);
51 rb = _mm_srli_epi16(rb, 8); 53 rb = _mm_srli_epi16(rb, 8);
52 54
53 // uint32_t ag = ((c >> 8) & mask) * scale 55 // uint32_t ag = ((c >> 8) & mask) * scale
54 __m128i ag = _mm_srli_epi16(c, 8); 56 __m128i ag = _mm_srli_epi16(c, 8);
55 ag = _mm_and_si128(ag, mask); 57 ASSERT_EQ(ag, _mm_and_si128(mask, ag)); // ag = _mm_srli_epi16(c, 8) did th is for us.
56 ag = _mm_mullo_epi16(ag, s); 58 ag = _mm_mullo_epi16(ag, s);
57 59
58 // (rb & mask) | (ag & ~mask) 60 // (rb & mask) | (ag & ~mask)
59 rb = _mm_and_si128(mask, rb); 61 ASSERT_EQ(rb, _mm_and_si128(mask, rb)); // rb = _mm_srli_epi16(rb, 8) did t his for us.
60 ag = _mm_andnot_si128(mask, ag); 62 ag = _mm_andnot_si128(mask, ag);
61 return _mm_or_si128(rb, ag); 63 return _mm_or_si128(rb, ag);
62 } 64 }
63 65
64 static inline __m128i SkGetPackedA32_SSE2(const __m128i& src) { 66 static inline __m128i SkGetPackedA32_SSE2(const __m128i& src) {
65 __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT)); 67 __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT));
66 return _mm_srli_epi32(a, 24); 68 return _mm_srli_epi32(a, 24);
67 } 69 }
68 70
69 static inline __m128i SkGetPackedR32_SSE2(const __m128i& src) { 71 static inline __m128i SkGetPackedR32_SSE2(const __m128i& src) {
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
176 SK_B32_SHIFT + (8 - SK_B16_BITS)); 178 SK_B32_SHIFT + (8 - SK_B16_BITS));
177 b2 = _mm_and_si128(b2, _mm_set1_epi32(SK_B16_MASK)); 179 b2 = _mm_and_si128(b2, _mm_set1_epi32(SK_B16_MASK));
178 __m128i b = _mm_packs_epi32(b1, b2); 180 __m128i b = _mm_packs_epi32(b1, b2);
179 181
180 // Store 8 16-bit colors in dst. 182 // Store 8 16-bit colors in dst.
181 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b); 183 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b);
182 184
183 return d_pixel; 185 return d_pixel;
184 } 186 }
185 187
188 #undef ASSERT_EQ
186 #endif // SkColor_opts_SSE2_DEFINED 189 #endif // SkColor_opts_SSE2_DEFINED
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698