src/opts/SkPMFloat_SSE2.h - Issue 1201343004: Convert SkPMFloat to [0,1] range and prune its API.

Side by Side Diff: src/opts/SkPMFloat_SSE2.h

Issue 1201343004: Convert SkPMFloat to [0,1] range and prune its API. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: tweaks Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /*

2 * Copyright 2015 Google Inc.

3 *

4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.

6 */

7

8 namespace { // See SkPMFloat.h

9

10 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo nents in 16 bits

11 // (fix8_16), then widen those to 8-bit-in-32-bits (fix8_32), and finally conver t those to floats.

12

13 // round() and roundClamp() do the opposite, working from floats to 8-bit-in-32- bit,

14 // to 8-bit-in-16-bit, back down to 8-bit components.

15 // _mm_packus_epi16() gives us clamping for free while narrowing.

16

17 inline SkPMFloat::SkPMFloat(SkPMColor c) {

18 SkPMColorAssert(c);

19 __m128i fix8 = _mm_set_epi32(0,0,0,c),

20 fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()),

21 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());

22 fVec = _mm_cvtepi32_ps(fix8_32);

23 SkASSERT(this->isValid());

24 }

25

26 inline SkPMColor SkPMFloat::round() const {

27 return this->roundClamp(); // Haven't beaten this yet.

28 }

29

30 inline SkPMColor SkPMFloat::roundClamp() const {

31 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).

32 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fVec)),

33 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),

34 fix8 = _mm_packus_epi16(fix8_16, fix8_16);

35 SkPMColor c = _mm_cvtsi128_si32(fix8);

36 SkPMColorAssert(c);

37 return c;

38 }

39

40 inline SkPMColor SkPMFloat::trunc() const {

41 // Basically, same as roundClamp(), but no rounding.

42 __m128i fix8_32 = _mm_cvttps_epi32(fVec),

43 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),

44 fix8 = _mm_packus_epi16(fix8_16, fix8_16);

45 SkPMColor c = _mm_cvtsi128_si32(fix8);

46 SkPMColorAssert(c);

47 return c;

48 }

49

50 inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],

51 SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, S kPMFloat* d) {

52 // Haven't beaten this yet.

53 *a = FromPMColor(colors[0]);

54 *b = FromPMColor(colors[1]);

55 *c = FromPMColor(colors[2]);

56 *d = FromPMColor(colors[3]);

57 }

58

59 inline void SkPMFloat::RoundTo4PMColors(

60 const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFlo at& d,

61 SkPMColor colors[4]) {

62 // Haven't beaten this yet.

63 RoundClampTo4PMColors(a,b,c,d, colors);

64 }

65

66 inline void SkPMFloat::RoundClampTo4PMColors(

67 const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFlo at& d,

68 SkPMColor colors[4]) {

69 // Same as _SSSE3.h's. We use 3 _mm_packus_epi16() where the naive loop use s 8.

70 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).

71 __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fVec)),

72 c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fVec)),

73 c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fVec)),

74 c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fVec));

75 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),

76 _mm_packus_epi16(c2, c3));

77 _mm_storeu_si128((__m128i*)colors, c3210);

78 SkPMColorAssert(colors[0]);

79 SkPMColorAssert(colors[1]);

80 SkPMColorAssert(colors[2]);

81 SkPMColorAssert(colors[3]);

82 }

83

84 } // namespace

OLD	NEW

« no previous file with comments | « src/effects/SkColorMatrixFilter.cpp ('k') | src/opts/SkPMFloat_SSSE3.h » ('j') | no next file with comments »