Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/opts/SkPMFloat_sse.h

Issue 1308903003: Templatize SkPMFloat to support both 1 and 255 biases. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: pump the loops for Android Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 namespace { // See SkPMFloat.h 8 namespace { // See SkPMFloat.h
9 9
10 inline SkPMFloat::SkPMFloat(SkPMColor c) { 10 template <int kBias>
11 inline SkPMFloat<kBias>::SkPMFloat(SkPMColor c) {
11 SkPMColorAssert(c); 12 SkPMColorAssert(c);
12 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 13 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
13 const char _ = ~0; // Zero these bytes. 14 const char _ = ~0; // Zero these bytes.
14 __m128i fix8 = _mm_cvtsi32_si128((int)c), 15 __m128i fix8 = _mm_cvtsi32_si128((int)c),
15 fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_ ,_,_, 3,_,_,_)); 16 fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_ ,_,_, 3,_,_,_));
16 #else 17 #else
17 __m128i fix8 = _mm_cvtsi32_si128((int)c), 18 __m128i fix8 = _mm_cvtsi32_si128((int)c),
18 fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()), 19 fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()),
19 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); 20 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());
20 #endif 21 #endif
21 fVec = _mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f / 255)); 22 fVec = _mm_cvtepi32_ps(fix8_32);
23 if (kBias == 1) {
24 fVec = _mm_mul_ps(fVec, _mm_set1_ps(1.0f / 255));
25 }
22 SkASSERT(this->isValid()); 26 SkASSERT(this->isValid());
23 } 27 }
24 28
25 inline SkPMColor SkPMFloat::round() const { 29 template <int kBias>
30 inline SkPMColor SkPMFloat<kBias>::round() const {
31 __m128 scaled = fVec;
32 if (kBias == 1) {
33 scaled = _mm_mul_ps(scaled, _mm_set1_ps(255));
34 }
26 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up). 35 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).
27 __m128 scaled = _mm_mul_ps(_mm_set1_ps(255), fVec);
28 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), scaled)), 36 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), scaled)),
29 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), 37 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),
30 fix8 = _mm_packus_epi16(fix8_16, fix8_16); 38 fix8 = _mm_packus_epi16(fix8_16, fix8_16);
31 SkPMColor c = _mm_cvtsi128_si32(fix8); 39 SkPMColor c = _mm_cvtsi128_si32(fix8);
32 SkPMColorAssert(c); 40 SkPMColorAssert(c);
33 return c; 41 return c;
34 } 42 }
35 43
36 inline Sk4f SkPMFloat::alphas() const { 44 template <int kBias>
45 inline Sk4f SkPMFloat<kBias>::alphas() const {
37 static_assert(SK_A32_SHIFT == 24, ""); 46 static_assert(SK_A32_SHIFT == 24, "");
38 return _mm_shuffle_ps(fVec, fVec, 0xff); // Read as 11 11 11 11, copying la ne 3 to all lanes. 47 return _mm_shuffle_ps(fVec, fVec, 0xff); // Read as 11 11 11 11, copying la ne 3 to all lanes.
39 } 48 }
40 49
41 inline SkPMFloat SkPMFloat::FromOpaqueColor(SkColor c) { 50 template <int kBias>
51 inline SkPMFloat<kBias> SkPMFloat<kBias>::FromOpaqueColor(SkColor c) {
42 SkASSERT(SkColorGetA(c) == 0xFF); 52 SkASSERT(SkColorGetA(c) == 0xFF);
43 __m128i fix8 = _mm_cvtsi32_si128((int)c); 53 __m128i fix8 = _mm_cvtsi32_si128((int)c);
44 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 54 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
45 const char _ = ~0; // Zero these bytes. 55 const char _ = ~0; // Zero these bytes.
46 __m128i fix8_32 = _mm_shuffle_epi8(fix8, 56 __m128i fix8_32 = _mm_shuffle_epi8(fix8,
47 #if defined(SK_PMCOLOR_IS_BGRA) 57 #if defined(SK_PMCOLOR_IS_BGRA)
48 _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_) 58 _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_)
49 #else 59 #else
50 _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, 3,_,_,_) 60 _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, 3,_,_,_)
51 #endif 61 #endif
52 ); 62 );
53 #else 63 #else
54 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8 , _mm_setzero_si128()), 64 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8 , _mm_setzero_si128()),
55 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); 65 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());
56 #if defined(SK_PMCOLOR_IS_RGBA) 66 #if defined(SK_PMCOLOR_IS_RGBA)
57 fix8_32 = _mm_shuffle_epi32(fix8_32, 0xC6); // C6 == 11 00 01 10, i.e s wap lanes 0 and 2. 67 fix8_32 = _mm_shuffle_epi32(fix8_32, 0xC6); // C6 == 11 00 01 10, i.e s wap lanes 0 and 2.
58 #endif 68 #endif
59 #endif 69 #endif
60 SkPMFloat pmf = Sk4f(_mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f/2 55))); 70 __m128 floats = _mm_cvtepi32_ps(fix8_32);
71 if (kBias == 1) {
72 floats = _mm_mul_ps(floats, _mm_set1_ps(1.0f/255));
73 }
74 SkPMFloat pmf = Sk4f(floats);
61 SkASSERT(pmf.isValid()); 75 SkASSERT(pmf.isValid());
62 return pmf; 76 return pmf;
63 } 77 }
64 78
65 } // namespace 79 } // namespace
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698