Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 1048593002: Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T> (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: This is actually faster Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | src/opts/SkPMFloat_SSE2.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED
10
11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo re recent.
12 #include <immintrin.h>
13
14 template <>
15 class SkNi<2, int32_t> {
16 public:
17 SkNi(const __m128i& vec) : fVec(vec) {}
18
19 SkNi() {}
20 bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); }
21 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); }
22
23 private:
24 __m128i fVec;
25 };
26
27 template <>
28 class SkNi<4, int32_t> {
29 public:
30 SkNi(const __m128i& vec) : fVec(vec) {}
31
32 SkNi() {}
33 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); }
34 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); }
35
36 private:
37 __m128i fVec;
38 };
39
40 template <>
41 class SkNi<2, int64_t> {
42 public:
43 SkNi(const __m128i& vec) : fVec(vec) {}
44
45 SkNi() {}
46 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); }
47 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); }
48
49 private:
50 __m128i fVec;
51 };
52
53
54 template <>
55 class SkNf<2, float> {
56 typedef SkNi<2, int32_t> Ni;
57 public:
58 SkNf(const __m128& vec) : fVec(vec) {}
59
60 SkNf() {}
61 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {}
62 static SkNf Load(const float vals[2]) {
63 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals));
64 }
65 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {}
66
67 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); }
68
69 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); }
70 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); }
71 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); }
72 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); }
73
74 Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); }
75 Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps (fVec, o.fVec)); }
76 Ni operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); }
77 Ni operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); }
78 Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); }
79 Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); }
80
81 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r. fVec); }
82 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r. fVec); }
83
84 SkNf sqrt() const { return _mm_sqrt_ps (fVec); }
85 SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); }
86
87 SkNf invert() const { return SkNf(1) / *this; }
88 SkNf approxInvert() const { return _mm_rcp_ps(fVec); }
89
90 float operator[] (int k) const {
91 SkASSERT(0 <= k && k < 2);
92 union { __m128 v; float fs[4]; } pun = {fVec};
93 return pun.fs[k];
94 }
95
96 private:
97 __m128 fVec;
98 };
99
100 template <>
101 class SkNf<2, double> {
102 typedef SkNi<2, int64_t> Ni;
103 public:
104 SkNf(const __m128d& vec) : fVec(vec) {}
105
106 SkNf() {}
107 explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {}
108 static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); }
109 SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {}
110
111 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); }
112
113 SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); }
114 SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); }
115 SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); }
116 SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); }
117
118 Ni operator == (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpeq_pd (fVec, o.fVec)); }
119 Ni operator != (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpneq_pd (fVec, o.fVec)); }
120 Ni operator < (const SkNf& o) const { return _mm_castpd_si128(_mm_cmplt_pd (fVec, o.fVec)); }
121 Ni operator > (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpgt_pd (fVec, o.fVec)); }
122 Ni operator <= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmple_pd (fVec, o.fVec)); }
123 Ni operator >= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpge_pd (fVec, o.fVec)); }
124
125 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r. fVec); }
126 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r. fVec); }
127
128 SkNf sqrt() const { return _mm_sqrt_pd(fVec); }
129 SkNf rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); }
130
131 SkNf invert() const { return SkNf(1) / *this; }
132 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec ))); }
133
134 double operator[] (int k) const {
135 SkASSERT(0 <= k && k < 2);
136 union { __m128d v; double ds[2]; } pun = {fVec};
137 return pun.ds[k];
138 }
139
140 private:
141 __m128d fVec;
142 };
143
144 template <>
145 class SkNf<4, float> {
146 typedef SkNi<4, int32_t> Ni;
147 public:
148 SkNf(const __m128& vec) : fVec(vec) {}
149 __m128 vec() const { return fVec; }
150
151 SkNf() {}
152 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {}
153 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); }
154 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}
155
156 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); }
157
158 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); }
159 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); }
160 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); }
161 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); }
162
163 Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); }
164 Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps (fVec, o.fVec)); }
165 Ni operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); }
166 Ni operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); }
167 Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); }
168 Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); }
169
170 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r. fVec); }
171 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r. fVec); }
172
173 SkNf sqrt() const { return _mm_sqrt_ps (fVec); }
174 SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); }
175
176 SkNf invert() const { return SkNf(1) / *this; }
177 SkNf approxInvert() const { return _mm_rcp_ps(fVec); }
178
179 float operator[] (int k) const {
180 SkASSERT(0 <= k && k < 4);
181 union { __m128 v; float fs[4]; } pun = {fVec};
182 return pun.fs[k];
183 }
184
185 private:
186 __m128 fVec;
187 };
188
189
190 #endif//SkNx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | src/opts/SkPMFloat_SSE2.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698