Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/opts/SkPx_sse.h

Issue 1447273004: trim some fat from SSE2 fixed point alpha code (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/opts/Sk4px_SSE2.h ('K') | « src/opts/Sk4px_SSE2.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkPx_sse_DEFINED 8 #ifndef SkPx_sse_DEFINED
9 #define SkPx_sse_DEFINED 9 #define SkPx_sse_DEFINED
10 10
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
54 struct Alpha { 54 struct Alpha {
55 __m128i fVec; 55 __m128i fVec;
56 Alpha(__m128i vec) : fVec(vec) {} 56 Alpha(__m128i vec) : fVec(vec) {}
57 57
58 static Alpha Dup(uint8_t a) { return _mm_set1_epi8(a); } 58 static Alpha Dup(uint8_t a) { return _mm_set1_epi8(a); }
59 static Alpha Load(const uint8_t* a) { 59 static Alpha Load(const uint8_t* a) {
60 __m128i as = _mm_cvtsi32_si128(*(const uint32_t*)a); // ____ ____ ____ 3210 60 __m128i as = _mm_cvtsi32_si128(*(const uint32_t*)a); // ____ ____ ____ 3210
61 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 61 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
62 return _mm_shuffle_epi8(as, _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0)); 62 return _mm_shuffle_epi8(as, _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0));
63 #else 63 #else
64 as = _mm_unpacklo_epi8 (as, _mm_setzero_si128()); // ____ ____ _3_2 _1_0 64 as = _mm_unpacklo_epi8 (as, as); // ____ ____ 3322 1100
65 as = _mm_unpacklo_epi16(as, _mm_setzero_si128()); // ___3 ___2 ___1 ___0 65 as = _mm_unpacklo_epi16(as, as); // 3333 2222 1111 0000
66 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00 66 return as;
67 return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000
68 #endif 67 #endif
69 } 68 }
70 static Alpha Load(const uint8_t* a, int n) { 69 static Alpha Load(const uint8_t* a, int n) {
71 SkASSERT(n > 0 && n < 4); 70 SkASSERT(n > 0 && n < 4);
72 uint8_t a4[] = { 0,0,0,0 }; 71 uint8_t a4[] = { 0,0,0,0 };
73 switch (n) { 72 switch (n) {
74 case 3: a4[2] = a[2]; // fall through 73 case 3: a4[2] = a[2]; // fall through
75 case 2: a4[1] = a[1]; // fall through 74 case 2: a4[1] = a[1]; // fall through
76 case 1: a4[0] = a[0]; 75 case 1: a4[0] = a[0];
77 } 76 }
(...skipping 23 matching lines...) Expand all
101 SkPx addNarrowHi(const SkPx& o) const { 100 SkPx addNarrowHi(const SkPx& o) const {
102 Wide sum = (*this + o.widenLo()).shr<8>(); 101 Wide sum = (*this + o.widenLo()).shr<8>();
103 return _mm_packus_epi16(sum.fLo, sum.fHi); 102 return _mm_packus_epi16(sum.fLo, sum.fHi);
104 } 103 }
105 }; 104 };
106 105
107 Alpha alpha() const { 106 Alpha alpha() const {
108 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 107 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
109 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7 ,7,7, 3,3,3,3)); 108 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7 ,7,7, 3,3,3,3));
110 #else 109 #else
111 __m128i as = _mm_srli_epi32(fVec, 24); // ___3 ___2 ___1 ___0 110 // We exploit that A >= rgb for any premul pixel.
112 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00 111 __m128i as = fVec; // 3xxx 2xxx 1xxx 0xxx
113 return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000 112 as = _mm_max_epu8(as, _mm_srli_epi32(as, 8)); // 33xx 22xx 11xx 00xx
113 as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000
114 return as;
114 #endif 115 #endif
115 } 116 }
116 117
117 Wide widenLo() const { 118 Wide widenLo() const {
118 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()), 119 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()),
119 _mm_unpackhi_epi8(fVec, _mm_setzero_si128())); 120 _mm_unpackhi_epi8(fVec, _mm_setzero_si128()));
120 } 121 }
121 Wide widenHi() const { 122 Wide widenHi() const {
122 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec), 123 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec),
123 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec)); 124 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec));
(...skipping 21 matching lines...) Expand all
145 SkPx addAlpha(const Alpha& a) const { 146 SkPx addAlpha(const Alpha& a) const {
146 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000 0))); 147 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000 0)));
147 } 148 }
148 }; 149 };
149 150
150 } // namespace sse 151 } // namespace sse
151 152
152 typedef sse::SkPx SkPx; 153 typedef sse::SkPx SkPx;
153 154
154 #endif//SkPx_sse_DEFINED 155 #endif//SkPx_sse_DEFINED
OLDNEW
« src/opts/Sk4px_SSE2.h ('K') | « src/opts/Sk4px_SSE2.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698