src/opts/SkPMFloat_SSSE3.h - Issue 1048593002: Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>

Side by Side Diff: src/opts/SkPMFloat_SSSE3.h

Issue 1048593002: Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T> (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: This is actually faster Created 5 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 inline SkPMFloat& SkPMFloat::operator=(const SkPMFloat& that) {

9 fColors = that.fColors;

10 return *this;

11 }

12

13 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo nents in 32 bits	8 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo nents in 32 bits

14 // (fix8_32), then convert those to floats.	9 // (fix8_32), then convert those to floats.

15	10

16 // get() does the opposite, working from floats to 8-bit-in-32-bits, then back t o packed 8 bit.	11 // get() does the opposite, working from floats to 8-bit-in-32-bits, then back t o packed 8 bit.

17	12

18 // clamped() is the same as _SSE2: floats to 8-in-32, to 8-in-16, to packed 8 bi t, with	13 // clamped() is the same as _SSE2: floats to 8-in-32, to 8-in-16, to packed 8 bi t, with

19 // _mm_packus_epi16() both clamping and narrowing.	14 // _mm_packus_epi16() both clamping and narrowing.

20	15

21 inline SkPMFloat::SkPMFloat(SkPMColor c) {	16 inline SkPMFloat::SkPMFloat(SkPMColor c) {

22 SkPMColorAssert(c);	17 SkPMColorAssert(c);

23 const int _ = 255; // _ means to zero that byte.	18 const int _ = 255; // _ means to zero that byte.

24 __m128i fix8 = _mm_set_epi32(0,0,0,c),	19 __m128i fix8 = _mm_set_epi32(0,0,0,c),

25 fix8_32 = _mm_shuffle_epi8(fix8, _mm_set_epi8(_,_,_,3, _,_,_,2, _,_, _,1, _,_,_,0));	20 fix8_32 = _mm_shuffle_epi8(fix8, _mm_set_epi8(_,_,_,3, _,_,_,2, _,_, _,1, _,_,_,0));

26 fColors = _mm_cvtepi32_ps(fix8_32);	21 fColors = _mm_cvtepi32_ps(fix8_32);

27 SkASSERT(this->isValid());	22 SkASSERT(this->isValid());

28 }	23 }

29	24

30 inline SkPMColor SkPMFloat::trunc() const {	25 inline SkPMColor SkPMFloat::trunc() const {

31 const int _ = 255; // _ means to zero that byte.	26 const int _ = 255; // _ means to zero that byte.

32 __m128i fix8_32 = _mm_cvttps_epi32(fColors),	27 __m128i fix8_32 = _mm_cvttps_epi32(fColors.vec()),

33 fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _ ,_,_,_, 12,8,4,0));	28 fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _ ,_,_,_, 12,8,4,0));

34 SkPMColor c = _mm_cvtsi128_si32(fix8);	29 SkPMColor c = _mm_cvtsi128_si32(fix8);

35 SkPMColorAssert(c);	30 SkPMColorAssert(c);

36 return c;	31 return c;

37 }	32 }

38	33

39 inline SkPMColor SkPMFloat::get() const {	34 inline SkPMColor SkPMFloat::get() const {

40 SkASSERT(this->isValid());	35 SkASSERT(this->isValid());

41 return SkPMFloat(Sk4f(0.5f) + *this).trunc();	36 return SkPMFloat(Sk4s(0.5f) + *this).trunc();

42 }	37 }

43	38

44 inline SkPMColor SkPMFloat::clamped() const {	39 inline SkPMColor SkPMFloat::clamped() const {

45 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).	40 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).

46 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors)),	41 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors.vec ())),

47 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),	42 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),

48 fix8 = _mm_packus_epi16(fix8_16, fix8_16);	43 fix8 = _mm_packus_epi16(fix8_16, fix8_16);

49 SkPMColor c = _mm_cvtsi128_si32(fix8);	44 SkPMColor c = _mm_cvtsi128_si32(fix8);

50 SkPMColorAssert(c);	45 SkPMColorAssert(c);

51 return c;	46 return c;

52 }	47 }

53	48

54 inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],	49 inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],

55 SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, S kPMFloat* d) {	50 SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, S kPMFloat* d) {

56 // Haven't beaten this yet.	51 // Haven't beaten this yet.

(...skipping 11 matching lines...) Expand all Loading...
68 colors[1] = b.get();	63 colors[1] = b.get();

69 colors[2] = c.get();	64 colors[2] = c.get();

70 colors[3] = d.get();	65 colors[3] = d.get();

71 }	66 }

72	67

73 inline void SkPMFloat::ClampTo4PMColors(	68 inline void SkPMFloat::ClampTo4PMColors(

74 const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFlo at& d,	69 const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFlo at& d,

75 SkPMColor colors[4]) {	70 SkPMColor colors[4]) {

76 // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.	71 // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.

77 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).	72 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).

78 __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors)),	73 __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors.vec()) ),

79 c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors)),	74 c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors.vec()) ),

80 c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors)),	75 c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors.vec()) ),

81 c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors));	76 c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors.vec()) );

82 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),	77 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),

83 _mm_packus_epi16(c2, c3));	78 _mm_packus_epi16(c2, c3));

84 _mm_storeu_si128((__m128i*)colors, c3210);	79 _mm_storeu_si128((__m128i*)colors, c3210);

85 SkPMColorAssert(colors[0]);	80 SkPMColorAssert(colors[0]);

86 SkPMColorAssert(colors[1]);	81 SkPMColorAssert(colors[1]);

87 SkPMColorAssert(colors[2]);	82 SkPMColorAssert(colors[2]);

88 SkPMColorAssert(colors[3]);	83 SkPMColorAssert(colors[3]);

89 }	84 }

OLD	NEW

« no previous file with comments | « src/opts/SkPMFloat_SSE2.h ('k') | src/opts/SkPMFloat_neon.h » ('j') | no next file with comments »