src/opts/SkPMFloat_SSSE3.h - Issue 1055123002: New names for SkPMFloat methods.

Side by Side Diff: src/opts/SkPMFloat_SSSE3.h

Issue 1055123002: New names for SkPMFloat methods. (Closed) Base URL: https://skia.googlesource.com/skia@master

Patch Set: one more Created 5 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « src/opts/SkPMFloat_SSE2.h ('k') | src/opts/SkPMFloat_neon.h » ('j') | no next file with comments »

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo nents in 32 bits	8 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo nents in 32 bits

9 // (fix8_32), then convert those to floats.	9 // (fix8_32), then convert those to floats.

10	10

11 // get() does the opposite, working from floats to 8-bit-in-32-bits, then back t o packed 8 bit.	11 // round() does the opposite, working from floats to 8-bit-in-32-bits, then back to packed 8 bit.

12	12

13 // clamped() is the same as _SSE2: floats to 8-in-32, to 8-in-16, to packed 8 bi t, with	13 // roundClamp() is the same as _SSE2: floats to 8-in-32, to 8-in-16, to packed 8 bit, with

14 // _mm_packus_epi16() both clamping and narrowing.	14 // _mm_packus_epi16() both clamping and narrowing.

15	15

16 inline SkPMFloat::SkPMFloat(SkPMColor c) {	16 inline SkPMFloat::SkPMFloat(SkPMColor c) {

17 SkPMColorAssert(c);	17 SkPMColorAssert(c);

18 const int _ = 255; // _ means to zero that byte.	18 const int _ = 255; // _ means to zero that byte.

19 __m128i fix8 = _mm_set_epi32(0,0,0,c),	19 __m128i fix8 = _mm_set_epi32(0,0,0,c),

20 fix8_32 = _mm_shuffle_epi8(fix8, _mm_set_epi8(_,_,_,3, _,_,_,2, _,_, _,1, _,_,_,0));	20 fix8_32 = _mm_shuffle_epi8(fix8, _mm_set_epi8(_,_,_,3, _,_,_,2, _,_, _,1, _,_,_,0));

21 fColors = _mm_cvtepi32_ps(fix8_32);	21 fColors = _mm_cvtepi32_ps(fix8_32);

22 SkASSERT(this->isValid());	22 SkASSERT(this->isValid());

23 }	23 }

24	24

25 inline SkPMColor SkPMFloat::trunc() const {	25 inline SkPMColor SkPMFloat::trunc() const {

26 const int _ = 255; // _ means to zero that byte.	26 const int _ = 255; // _ means to zero that byte.

27 __m128i fix8_32 = _mm_cvttps_epi32(fColors.vec()),	27 __m128i fix8_32 = _mm_cvttps_epi32(fColors.vec()),

28 fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _ ,_,_,_, 12,8,4,0));	28 fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _ ,_,_,_, 12,8,4,0));

29 SkPMColor c = _mm_cvtsi128_si32(fix8);	29 SkPMColor c = _mm_cvtsi128_si32(fix8);

30 SkPMColorAssert(c);	30 SkPMColorAssert(c);

31 return c;	31 return c;

32 }	32 }

33	33

34 inline SkPMColor SkPMFloat::get() const {	34 inline SkPMColor SkPMFloat::round() const {

35 return SkPMFloat(Sk4f(0.5f) + *this).trunc();	35 return SkPMFloat(Sk4f(0.5f) + *this).trunc();

36 }	36 }

37	37

38 inline SkPMColor SkPMFloat::clamped() const {	38 inline SkPMColor SkPMFloat::roundClamp() const {

39 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).	39 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).

40 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors.vec ())),	40 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors.vec ())),

41 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),	41 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),

42 fix8 = _mm_packus_epi16(fix8_16, fix8_16);	42 fix8 = _mm_packus_epi16(fix8_16, fix8_16);

43 SkPMColor c = _mm_cvtsi128_si32(fix8);	43 SkPMColor c = _mm_cvtsi128_si32(fix8);

44 SkPMColorAssert(c);	44 SkPMColorAssert(c);

45 return c;	45 return c;

46 }	46 }

47	47

48 inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],	48 inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],

49 SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, S kPMFloat* d) {	49 SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, S kPMFloat* d) {

50 // Haven't beaten this yet.	50 // Haven't beaten this yet.

51 *a = FromPMColor(colors[0]);	51 *a = FromPMColor(colors[0]);

52 *b = FromPMColor(colors[1]);	52 *b = FromPMColor(colors[1]);

53 *c = FromPMColor(colors[2]);	53 *c = FromPMColor(colors[2]);

54 *d = FromPMColor(colors[3]);	54 *d = FromPMColor(colors[3]);

55 }	55 }

56	56

57 inline void SkPMFloat::To4PMColors(	57 inline void SkPMFloat::RoundTo4PMColors(

58 const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFlo at& d,	58 const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFlo at& d,

59 SkPMColor colors[4]) {	59 SkPMColor colors[4]) {

60 // Haven't beaten this yet. Still faster than ClampTo4PMColors?	60 // Haven't beaten this yet. Still faster than RoundClampTo4PMColors?

61 colors[0] = a.get();	61 colors[0] = a.round();

62 colors[1] = b.get();	62 colors[1] = b.round();

63 colors[2] = c.get();	63 colors[2] = c.round();

64 colors[3] = d.get();	64 colors[3] = d.round();

65 }	65 }

66	66

67 inline void SkPMFloat::ClampTo4PMColors(	67 inline void SkPMFloat::RoundClampTo4PMColors(

68 const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFlo at& d,	68 const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFlo at& d,

69 SkPMColor colors[4]) {	69 SkPMColor colors[4]) {

70 // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.	70 // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.

71 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).	71 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).

72 __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors.vec()) ),	72 __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors.vec()) ),

73 c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors.vec()) ),	73 c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors.vec()) ),

74 c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors.vec()) ),	74 c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors.vec()) ),

75 c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors.vec()) );	75 c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors.vec()) );

76 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),	76 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),

77 _mm_packus_epi16(c2, c3));	77 _mm_packus_epi16(c2, c3));

78 _mm_storeu_si128((__m128i*)colors, c3210);	78 _mm_storeu_si128((__m128i*)colors, c3210);

79 SkPMColorAssert(colors[0]);	79 SkPMColorAssert(colors[0]);

80 SkPMColorAssert(colors[1]);	80 SkPMColorAssert(colors[1]);

81 SkPMColorAssert(colors[2]);	81 SkPMColorAssert(colors[2]);

82 SkPMColorAssert(colors[3]);	82 SkPMColorAssert(colors[3]);

83 }	83 }

OLD	NEW