Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(120)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 1526923003: Specialize Sk2d for SSE2 (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: fix anyTrue Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkNx.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
58 return pun.fs[k&1]; 58 return pun.fs[k&1];
59 } 59 }
60 60
61 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV ec)) & 0xff); } 61 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV ec)) & 0xff); }
62 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV ec)) & 0xff); } 62 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV ec)) & 0xff); }
63 63
64 __m128 fVec; 64 __m128 fVec;
65 }; 65 };
66 66
67 template <> 67 template <>
68 class SkNx<2, double> {
69 public:
70 SkNx(const __m128d& vec) : fVec(vec) {}
71
72 SkNx() {}
73 SkNx(double val) : fVec(_mm_set1_pd(val)) {}
74 static SkNx Load(const double vals[2]) { return _mm_loadu_pd(vals); }
75 SkNx(double a, double b) : fVec(_mm_setr_pd(a,b)) {}
76
77 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); }
78
79 SkNx operator + (const SkNx& o) const { return _mm_add_pd(fVec, o.fVec); }
80 SkNx operator - (const SkNx& o) const { return _mm_sub_pd(fVec, o.fVec); }
81 SkNx operator * (const SkNx& o) const { return _mm_mul_pd(fVec, o.fVec); }
82 SkNx operator / (const SkNx& o) const { return _mm_div_pd(fVec, o.fVec); }
83
84 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_pd (fVec, o.fVec); }
85 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_pd(fVec, o.fVec); }
86 SkNx operator < (const SkNx& o) const { return _mm_cmplt_pd (fVec, o.fVec); }
87 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_pd (fVec, o.fVec); }
88 SkNx operator <= (const SkNx& o) const { return _mm_cmple_pd (fVec, o.fVec); }
89 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_pd (fVec, o.fVec); }
90
91 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_pd(l.fVec, r. fVec); }
92 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_pd(l.fVec, r. fVec); }
93
94 SkNx sqrt() const { return _mm_sqrt_pd(fVec); }
95
96 template <int k> double kth() const {
97 SkASSERT(0 <= k && k < 2);
98 union { __m128d v; double fs[2]; } pun = {fVec};
99 return pun.fs[k&1];
100 }
101
102 bool allTrue() const { return 0x3 == _mm_movemask_pd(fVec); }
103 bool anyTrue() const { return 0x0 != _mm_movemask_pd(fVec); }
104
105 SkNx thenElse(const SkNx& t, const SkNx& e) const {
106 return _mm_or_pd(_mm_and_pd (fVec, t.fVec),
107 _mm_andnot_pd(fVec, e.fVec));
108 }
109
110 __m128d fVec;
111 };
112
113 template <>
68 class SkNx<4, int> { 114 class SkNx<4, int> {
69 public: 115 public:
70 SkNx(const __m128i& vec) : fVec(vec) {} 116 SkNx(const __m128i& vec) : fVec(vec) {}
71 117
72 SkNx() {} 118 SkNx() {}
73 SkNx(int val) : fVec(_mm_set1_epi32(val)) {} 119 SkNx(int val) : fVec(_mm_set1_epi32(val)) {}
74 static SkNx Load(const int vals[4]) { return _mm_loadu_si128((const __m128i* )vals); } 120 static SkNx Load(const int vals[4]) { return _mm_loadu_si128((const __m128i* )vals); }
75 SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} 121 SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {}
76 122
77 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } 123 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after
328 _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec), 374 _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec),
329 _mm_cvttps_epi32(b.fVec)) , 375 _mm_cvttps_epi32(b.fVec)) ,
330 _mm_packus_epi16(_mm_cvttps_epi32(c.fVec), 376 _mm_packus_epi16(_mm_cvttps_epi32(c.fVec),
331 _mm_cvttps_epi32(d.fVec)) )); 377 _mm_cvttps_epi32(d.fVec)) ));
332 } 378 }
333 379
334 380
335 } // namespace 381 } // namespace
336 382
337 #endif//SkNx_sse_DEFINED 383 #endif//SkNx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/core/SkNx.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698