Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(119)

Side by Side Diff: src/opts/Sk4x_sse.h

Issue 1029163002: Replace _mm_cvtps_epi32(x) with _mm_cvttps_epi32(_mm_add_ps(0.5f), x). (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: tweak test Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/opts/SkPMFloat_SSE2.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // It is important _not_ to put header guards here. 1 // It is important _not_ to put header guards here.
2 // This file will be intentionally included three times. 2 // This file will be intentionally included three times.
3 3
4 // Useful reading: 4 // Useful reading:
5 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/ 5 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/
6 6
7 #include "SkTypes.h" // Keep this before any #ifdef for skbug.com/3362 7 #include "SkTypes.h" // Keep this before any #ifdef for skbug.com/3362
8 8
9 #if defined(SK4X_PREAMBLE) 9 #if defined(SK4X_PREAMBLE)
10 // Code in this file may assume SSE and SSE2. 10 // Code in this file may assume SSE and SSE2.
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
79 79
80 M() Sk4x(float v) : fVec(_mm_set1_ps(v)) {} 80 M() Sk4x(float v) : fVec(_mm_set1_ps(v)) {}
81 M() Sk4x(float a, float b, float c, float d) : fVec(_mm_set_ps(d,c,b,a)) {} 81 M() Sk4x(float a, float b, float c, float d) : fVec(_mm_set_ps(d,c,b,a)) {}
82 82
83 M(Sk4f) Load (const float fs[4]) { return _mm_loadu_ps(fs); } 83 M(Sk4f) Load (const float fs[4]) { return _mm_loadu_ps(fs); }
84 M(Sk4f) LoadAligned(const float fs[4]) { return _mm_load_ps (fs); } 84 M(Sk4f) LoadAligned(const float fs[4]) { return _mm_load_ps (fs); }
85 85
86 M(void) store (float fs[4]) const { _mm_storeu_ps(fs, fVec); } 86 M(void) store (float fs[4]) const { _mm_storeu_ps(fs, fVec); }
87 M(void) storeAligned(float fs[4]) const { _mm_store_ps (fs, fVec); } 87 M(void) storeAligned(float fs[4]) const { _mm_store_ps (fs, fVec); }
88 88
89 template <> 89 template <> M(Sk4i) reinterpret<Sk4i>() const { return as_4i(fVec); }
90 M(Sk4i) reinterpret<Sk4i>() const { return as_4i(fVec); }
91 90
92 template <> 91 // cvttps truncates, same as (int) when positive.
93 M(Sk4i) cast<Sk4i>() const { return _mm_cvtps_epi32(fVec); } 92 template <> M(Sk4i) cast<Sk4i>() const { return _mm_cvttps_epi32(fVec); }
94 93
95 // We're going to try a little experiment here and skip allTrue(), anyTrue(), an d bit-manipulators 94 // We're going to try a little experiment here and skip allTrue(), anyTrue(), an d bit-manipulators
96 // for Sk4f. Code that calls them probably does so accidentally. 95 // for Sk4f. Code that calls them probably does so accidentally.
97 // Ask mtklein to fill these in if you really need them. 96 // Ask mtklein to fill these in if you really need them.
98 97
99 M(Sk4f) add (const Sk4f& o) const { return _mm_add_ps(fVec, o.fVec); } 98 M(Sk4f) add (const Sk4f& o) const { return _mm_add_ps(fVec, o.fVec); }
100 M(Sk4f) subtract(const Sk4f& o) const { return _mm_sub_ps(fVec, o.fVec); } 99 M(Sk4f) subtract(const Sk4f& o) const { return _mm_sub_ps(fVec, o.fVec); }
101 M(Sk4f) multiply(const Sk4f& o) const { return _mm_mul_ps(fVec, o.fVec); } 100 M(Sk4f) multiply(const Sk4f& o) const { return _mm_mul_ps(fVec, o.fVec); }
102 M(Sk4f) divide (const Sk4f& o) const { return _mm_div_ps(fVec, o.fVec); } 101 M(Sk4f) divide (const Sk4f& o) const { return _mm_div_ps(fVec, o.fVec); }
103 102
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
175 } 174 }
176 M(Sk4i) Max(const Sk4i& a, const Sk4i& b) { 175 M(Sk4i) Max(const Sk4i& a, const Sk4i& b) {
177 Sk4i less = a.lessThan(b); 176 Sk4i less = a.lessThan(b);
178 return b.bitAnd(less).bitOr(a.andNot(less)); 177 return b.bitAnd(less).bitOr(a.andNot(less));
179 } 178 }
180 #endif 179 #endif
181 180
182 #undef M 181 #undef M
183 182
184 #endif//Method definitions. 183 #endif//Method definitions.
OLDNEW
« no previous file with comments | « no previous file | src/opts/SkPMFloat_SSE2.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698