src/opts/SkPMFloat_SSSE3.h - Issue 1035583002: Update 4-at-a-time APIs.

Side by Side Diff: src/opts/SkPMFloat_SSSE3.h

Issue 1035583002: Update 4-at-a-time APIs. (Closed) Base URL: https://skia.googlesource.com/skia@master

Patch Set: (C) Created 5 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	1 /*

	2 * Copyright 2015 Google Inc.

	3 *

	4 * Use of this source code is governed by a BSD-style license that can be

	5 * found in the LICENSE file.

	6 */

	7

1 inline SkPMFloat& SkPMFloat::operator=(const SkPMFloat& that) {	8 inline SkPMFloat& SkPMFloat::operator=(const SkPMFloat& that) {

2 fColors = that.fColors;	9 fColors = that.fColors;

3 return *this;	10 return *this;

4 }	11 }

5	12

6 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo nents in 32 bits	13 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo nents in 32 bits

7 // (fix8_32), then convert those to floats.	14 // (fix8_32), then convert those to floats.

8	15

9 // get() does the opposite, working from floats to 8-bit-in-32-bits, then back t o packed 8 bit.	16 // get() does the opposite, working from floats to 8-bit-in-32-bits, then back t o packed 8 bit.

10	17

(...skipping 23 matching lines...) Expand all Loading...
34 inline SkPMColor SkPMFloat::clamped() const {	41 inline SkPMColor SkPMFloat::clamped() const {

35 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).	42 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).

36 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors)),	43 __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors)),

37 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),	44 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),

38 fix8 = _mm_packus_epi16(fix8_16, fix8_16);	45 fix8 = _mm_packus_epi16(fix8_16, fix8_16);

39 SkPMColor c = _mm_cvtsi128_si32(fix8);	46 SkPMColor c = _mm_cvtsi128_si32(fix8);

40 SkPMColorAssert(c);	47 SkPMColorAssert(c);

41 return c;	48 return c;

42 }	49 }

43	50

44 inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors [4]) {	51 inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],

	52 SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, S kPMFloat* d) {

45 // Haven't beaten this yet.	53 // Haven't beaten this yet.

46 for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); }	54 *a = FromPMColor(colors[0]);

	55 *b = FromPMColor(colors[1]);

	56 *c = FromPMColor(colors[2]);

	57 *d = FromPMColor(colors[3]);

47 }	58 }

48	59

49 inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4 ]) {	60 inline void SkPMFloat::To4PMColors(

50 // Haven't beaten this yet. Still faster than ClampTo4PMColors too.	61 const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFlo at& d,

51 for (int i = 0; i < 4; i++) { colors[i] = floats[i].get(); }	62 SkPMColor colors[4]) {

	63 // Haven't beaten this yet. Still faster than ClampTo4PMColors?

	64 colors[0] = a.get();

	65 colors[1] = b.get();

	66 colors[2] = c.get();

	67 colors[3] = d.get();

52 }	68 }

53	69

54 inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat flo ats[4]) {	70 inline void SkPMFloat::ClampTo4PMColors(

	71 const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFlo at& d,

	72 SkPMColor colors[4]) {

55 // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.	73 // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.

56 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).	74 // We don't use _mm_cvtps_epi32, because we want precise control over how 0. 5 rounds (up).

57 __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[0].fColor s)),	75 __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors)),

58 c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[1].fColor s)),	76 c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors)),

59 c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[2].fColor s)),	77 c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors)),

60 c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[3].fColor s));	78 c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors));

61 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),	79 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),

62 _mm_packus_epi16(c2, c3));	80 _mm_packus_epi16(c2, c3));

63 _mm_storeu_si128((__m128i*)colors, c3210);	81 _mm_storeu_si128((__m128i*)colors, c3210);

64 SkPMColorAssert(colors[0]);	82 SkPMColorAssert(colors[0]);

65 SkPMColorAssert(colors[1]);	83 SkPMColorAssert(colors[1]);

66 SkPMColorAssert(colors[2]);	84 SkPMColorAssert(colors[2]);

67 SkPMColorAssert(colors[3]);	85 SkPMColorAssert(colors[3]);

68 }	86 }

OLD	NEW

« no previous file with comments | « src/opts/SkPMFloat_SSE2.h ('k') | src/opts/SkPMFloat_neon.h » ('j') | no next file with comments »