src/opts/SkPMFloat_SSE2.h - Issue 1048593002: Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: src/opts/SkPMFloat_SSE2.h

Issue 1048593002: Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T> (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: This is actually faster Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/opts/SkPMFloat_SSE2.h

diff --git a/src/opts/SkPMFloat_SSE2.h b/src/opts/SkPMFloat_SSE2.h

index fa920d75d6c0dc09e810895811dff7b1647d5117..231940d86eaa6509b08f4b985fc87b3fc4b30842 100644

--- a/src/opts/SkPMFloat_SSE2.h

+++ b/src/opts/SkPMFloat_SSE2.h

@@ -5,11 +5,6 @@

* found in the LICENSE file.

-inline SkPMFloat& SkPMFloat::operator=(const SkPMFloat& that) {

- fColors = that.fColors;

- return *this;

// For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit components in 16 bits

// (fix8_16), then widen those to 8-bit-in-32-bits (fix8_32), and finally convert those to floats.

@@ -33,7 +28,7 @@ inline SkPMColor SkPMFloat::get() const {

inline SkPMColor SkPMFloat::clamped() const {

// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).

- __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors)),

+ __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors.vec())),

fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),

fix8 = _mm_packus_epi16(fix8_16, fix8_16);

SkPMColor c = _mm_cvtsi128_si32(fix8);

@@ -43,7 +38,7 @@ inline SkPMColor SkPMFloat::clamped() const {

inline SkPMColor SkPMFloat::trunc() const {

// Basically, same as clamped(), but no rounding.

- __m128i fix8_32 = _mm_cvttps_epi32(fColors),

+ __m128i fix8_32 = _mm_cvttps_epi32(fColors.vec()),

fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),

fix8 = _mm_packus_epi16(fix8_16, fix8_16);

SkPMColor c = _mm_cvtsi128_si32(fix8);

@@ -72,10 +67,10 @@ inline void SkPMFloat::ClampTo4PMColors(

SkPMColor colors[4]) {

// Same as _SSSE3.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.

// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).

- __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors)),

- c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors)),

- c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors)),

- c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors));

+ __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors.vec())),

+ c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors.vec())),

+ c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors.vec())),

+ c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors.vec()));

__m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),

_mm_packus_epi16(c2, c3));

_mm_storeu_si128((__m128i*)colors, c3210);

« no previous file with comments | « src/opts/SkNx_sse.h ('k') | src/opts/SkPMFloat_SSSE3.h » ('j') | no next file with comments »