src/opts/SkPMFloat_sse.h - Issue 1308903003: Templatize SkPMFloat to support both 1 and 255 biases.

Side by Side Diff: src/opts/SkPMFloat_sse.h

Issue 1308903003: Templatize SkPMFloat to support both 1 and 255 biases. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: pump the loops for Android Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 namespace { // See SkPMFloat.h	8 namespace { // See SkPMFloat.h

9	9

10 inline SkPMFloat::SkPMFloat(SkPMColor c) {	10 template <int kBias>

	11 inline SkPMFloat<kBias>::SkPMFloat(SkPMColor c) {

11 SkPMColorAssert(c);	12 SkPMColorAssert(c);

12 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3	13 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

13 const char _ = ~0; // Zero these bytes.	14 const char _ = ~0; // Zero these bytes.

14 __m128i fix8 = _mm_cvtsi32_si128((int)c),	15 __m128i fix8 = _mm_cvtsi32_si128((int)c),

15 fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_ ,_,_, 3,_,_,_));	16 fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_ ,_,_, 3,_,_,_));

16 #else	17 #else

17 __m128i fix8 = _mm_cvtsi32_si128((int)c),	18 __m128i fix8 = _mm_cvtsi32_si128((int)c),

18 fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()),	19 fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()),

19 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());	20 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());

20 #endif	21 #endif

21 fVec = _mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f / 255));	22 fVec = _mm_cvtepi32_ps(fix8_32);

	23 if (kBias == 1) {

	24 fVec = _mm_mul_ps(fVec, _mm_set1_ps(1.0f / 255));

	25 }

22 SkASSERT(this->isValid());	26 SkASSERT(this->isValid());

23 }	27 }

24	28

25 inline SkPMColor SkPMFloat::round() const {	29 template <int kBias>

	30 inline SkPMColor SkPMFloat<kBias>::round() const {

	31 __m128 scaled = fVec;

	32 if (kBias == 1) {

	33 scaled = _mm_mul_ps(scaled, _mm_set1_ps(255));

	34 }

26 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).	35 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).

27 __m128 scaled = _mm_mul_ps(_mm_set1_ps(255), fVec);

28 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), scaled)),	36 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), scaled)),

29 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),	37 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),

30 fix8 = _mm_packus_epi16(fix8_16, fix8_16);	38 fix8 = _mm_packus_epi16(fix8_16, fix8_16);

31 SkPMColor c = _mm_cvtsi128_si32(fix8);	39 SkPMColor c = _mm_cvtsi128_si32(fix8);

32 SkPMColorAssert(c);	40 SkPMColorAssert(c);

33 return c;	41 return c;

34 }	42 }

35	43

36 inline Sk4f SkPMFloat::alphas() const {	44 template <int kBias>

	45 inline Sk4f SkPMFloat<kBias>::alphas() const {

37 static_assert(SK_A32_SHIFT == 24, "");	46 static_assert(SK_A32_SHIFT == 24, "");

38 return _mm_shuffle_ps(fVec, fVec, 0xff); // Read as 11 11 11 11, copying la ne 3 to all lanes.	47 return _mm_shuffle_ps(fVec, fVec, 0xff); // Read as 11 11 11 11, copying la ne 3 to all lanes.

39 }	48 }

40	49

41 inline SkPMFloat SkPMFloat::FromOpaqueColor(SkColor c) {	50 template <int kBias>

	51 inline SkPMFloat<kBias> SkPMFloat<kBias>::FromOpaqueColor(SkColor c) {

42 SkASSERT(SkColorGetA(c) == 0xFF);	52 SkASSERT(SkColorGetA(c) == 0xFF);

43 __m128i fix8 = _mm_cvtsi32_si128((int)c);	53 __m128i fix8 = _mm_cvtsi32_si128((int)c);

44 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3	54 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

45 const char _ = ~0; // Zero these bytes.	55 const char _ = ~0; // Zero these bytes.

46 __m128i fix8_32 = _mm_shuffle_epi8(fix8,	56 __m128i fix8_32 = _mm_shuffle_epi8(fix8,

47 #if defined(SK_PMCOLOR_IS_BGRA)	57 #if defined(SK_PMCOLOR_IS_BGRA)

48 _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_)	58 _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_)

49 #else	59 #else

50 _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, 3,_,_,_)	60 _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, 3,_,_,_)

51 #endif	61 #endif

52 );	62 );

53 #else	63 #else

54 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8 , _mm_setzero_si128()),	64 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8 , _mm_setzero_si128()),

55 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());	65 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());

56 #if defined(SK_PMCOLOR_IS_RGBA)	66 #if defined(SK_PMCOLOR_IS_RGBA)

57 fix8_32 = _mm_shuffle_epi32(fix8_32, 0xC6); // C6 == 11 00 01 10, i.e s wap lanes 0 and 2.	67 fix8_32 = _mm_shuffle_epi32(fix8_32, 0xC6); // C6 == 11 00 01 10, i.e s wap lanes 0 and 2.

58 #endif	68 #endif

59 #endif	69 #endif

60 SkPMFloat pmf = Sk4f(_mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f/2 55)));	70 __m128 floats = _mm_cvtepi32_ps(fix8_32);

	71 if (kBias == 1) {

	72 floats = _mm_mul_ps(floats, _mm_set1_ps(1.0f/255));

	73 }

	74 SkPMFloat pmf = Sk4f(floats);

61 SkASSERT(pmf.isValid());	75 SkASSERT(pmf.isValid());

62 return pmf;	76 return pmf;

63 }	77 }

64	78

65 } // namespace	79 } // namespace

OLD	NEW

« src/effects/SkColorMatrixFilter.cpp ('K') | « src/opts/SkPMFloat_none.h ('k') | src/opts/SkXfermode_opts.h » ('j') | no next file with comments »