Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(116)

Side by Side Diff: src/opts/SkPx_neon.h

Issue 1436513002: SkPx: use namespaces as namespaces (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/opts/SkPx_none.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkPx_neon_DEFINED 8 #ifndef SkPx_neon_DEFINED
9 #define SkPx_neon_DEFINED 9 #define SkPx_neon_DEFINED
10 10
11 // When we have NEON, we like to work 8 pixels at a time. 11 // When we have NEON, we like to work 8 pixels at a time.
12 // This lets us exploit vld4/vst4 and represent SkPx as planar uint8x8x4_t, 12 // This lets us exploit vld4/vst4 and represent SkPx as planar uint8x8x4_t,
13 // Wide as planar uint16x8x4_t, and Alpha as a single uint8x8_t plane. 13 // Wide as planar uint16x8x4_t, and Alpha as a single uint8x8_t plane.
14 14
15 struct SkPx_neon { 15 namespace neon {
16
17 struct SkPx {
16 static const int N = 8; 18 static const int N = 8;
17 19
18 uint8x8x4_t fVec; 20 uint8x8x4_t fVec;
19 SkPx_neon(uint8x8x4_t vec) : fVec(vec) {} 21 SkPx(uint8x8x4_t vec) : fVec(vec) {}
20 22
21 static SkPx_neon Dup(uint32_t px) { return vld4_dup_u8((const uint8_t*)&px); } 23 static SkPx Dup(uint32_t px) { return vld4_dup_u8((const uint8_t*)&px); }
22 static SkPx_neon Load(const uint32_t* px) { return vld4_u8((const uint8_t*)p x); } 24 static SkPx Load(const uint32_t* px) { return vld4_u8((const uint8_t*)px); }
23 static SkPx_neon Load(const uint32_t* px, int n) { 25 static SkPx Load(const uint32_t* px, int n) {
24 SkASSERT(0 < n && n < 8); 26 SkASSERT(0 < n && n < 8);
25 uint8x8x4_t v = vld4_dup_u8((const uint8_t*)px); // n>=1, so start all lanes with pixel 0. 27 uint8x8x4_t v = vld4_dup_u8((const uint8_t*)px); // n>=1, so start all lanes with pixel 0.
26 switch (n) { 28 switch (n) {
27 case 7: v = vld4_lane_u8((const uint8_t*)(px+6), v, 6); // fall thr ough 29 case 7: v = vld4_lane_u8((const uint8_t*)(px+6), v, 6); // fall thr ough
28 case 6: v = vld4_lane_u8((const uint8_t*)(px+5), v, 5); // fall thr ough 30 case 6: v = vld4_lane_u8((const uint8_t*)(px+5), v, 5); // fall thr ough
29 case 5: v = vld4_lane_u8((const uint8_t*)(px+4), v, 4); // fall thr ough 31 case 5: v = vld4_lane_u8((const uint8_t*)(px+4), v, 4); // fall thr ough
30 case 4: v = vld4_lane_u8((const uint8_t*)(px+3), v, 3); // fall thr ough 32 case 4: v = vld4_lane_u8((const uint8_t*)(px+3), v, 3); // fall thr ough
31 case 3: v = vld4_lane_u8((const uint8_t*)(px+2), v, 2); // fall thr ough 33 case 3: v = vld4_lane_u8((const uint8_t*)(px+2), v, 2); // fall thr ough
32 case 2: v = vld4_lane_u8((const uint8_t*)(px+1), v, 1); 34 case 2: v = vld4_lane_u8((const uint8_t*)(px+1), v, 1);
33 } 35 }
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
101 } 103 }
102 template <int bits> Wide shr() const { 104 template <int bits> Wide shr() const {
103 return (uint16x8x4_t) {{ 105 return (uint16x8x4_t) {{
104 vshrq_n_u16(fVec.val[0], bits), 106 vshrq_n_u16(fVec.val[0], bits),
105 vshrq_n_u16(fVec.val[1], bits), 107 vshrq_n_u16(fVec.val[1], bits),
106 vshrq_n_u16(fVec.val[2], bits), 108 vshrq_n_u16(fVec.val[2], bits),
107 vshrq_n_u16(fVec.val[3], bits), 109 vshrq_n_u16(fVec.val[3], bits),
108 }}; 110 }};
109 } 111 }
110 112
111 SkPx_neon addNarrowHi(const SkPx_neon& o) const { 113 SkPx addNarrowHi(const SkPx& o) const {
112 return (uint8x8x4_t) {{ 114 return (uint8x8x4_t) {{
113 vshrn_n_u16(vaddw_u8(fVec.val[0], o.fVec.val[0]), 8), 115 vshrn_n_u16(vaddw_u8(fVec.val[0], o.fVec.val[0]), 8),
114 vshrn_n_u16(vaddw_u8(fVec.val[1], o.fVec.val[1]), 8), 116 vshrn_n_u16(vaddw_u8(fVec.val[1], o.fVec.val[1]), 8),
115 vshrn_n_u16(vaddw_u8(fVec.val[2], o.fVec.val[2]), 8), 117 vshrn_n_u16(vaddw_u8(fVec.val[2], o.fVec.val[2]), 8),
116 vshrn_n_u16(vaddw_u8(fVec.val[3], o.fVec.val[3]), 8), 118 vshrn_n_u16(vaddw_u8(fVec.val[3], o.fVec.val[3]), 8),
117 }}; 119 }};
118 } 120 }
119 }; 121 };
120 122
121 Alpha alpha() const { return fVec.val[3]; } 123 Alpha alpha() const { return fVec.val[3]; }
122 124
123 Wide widenLo() const { 125 Wide widenLo() const {
124 return (uint16x8x4_t) {{ 126 return (uint16x8x4_t) {{
125 vmovl_u8(fVec.val[0]), 127 vmovl_u8(fVec.val[0]),
126 vmovl_u8(fVec.val[1]), 128 vmovl_u8(fVec.val[1]),
127 vmovl_u8(fVec.val[2]), 129 vmovl_u8(fVec.val[2]),
128 vmovl_u8(fVec.val[3]), 130 vmovl_u8(fVec.val[3]),
129 }}; 131 }};
130 } 132 }
131 // TODO: these two can probably be done faster. 133 // TODO: these two can probably be done faster.
132 Wide widenHi() const { return this->widenLo().shl<8>(); } 134 Wide widenHi() const { return this->widenLo().shl<8>(); }
133 Wide widenLoHi() const { return this->widenLo() + this->widenHi(); } 135 Wide widenLoHi() const { return this->widenLo() + this->widenHi(); }
134 136
135 SkPx_neon operator+(const SkPx_neon& o) const { 137 SkPx operator+(const SkPx& o) const {
136 return (uint8x8x4_t) {{ 138 return (uint8x8x4_t) {{
137 vadd_u8(fVec.val[0], o.fVec.val[0]), 139 vadd_u8(fVec.val[0], o.fVec.val[0]),
138 vadd_u8(fVec.val[1], o.fVec.val[1]), 140 vadd_u8(fVec.val[1], o.fVec.val[1]),
139 vadd_u8(fVec.val[2], o.fVec.val[2]), 141 vadd_u8(fVec.val[2], o.fVec.val[2]),
140 vadd_u8(fVec.val[3], o.fVec.val[3]), 142 vadd_u8(fVec.val[3], o.fVec.val[3]),
141 }}; 143 }};
142 } 144 }
143 SkPx_neon operator-(const SkPx_neon& o) const { 145 SkPx operator-(const SkPx& o) const {
144 return (uint8x8x4_t) {{ 146 return (uint8x8x4_t) {{
145 vsub_u8(fVec.val[0], o.fVec.val[0]), 147 vsub_u8(fVec.val[0], o.fVec.val[0]),
146 vsub_u8(fVec.val[1], o.fVec.val[1]), 148 vsub_u8(fVec.val[1], o.fVec.val[1]),
147 vsub_u8(fVec.val[2], o.fVec.val[2]), 149 vsub_u8(fVec.val[2], o.fVec.val[2]),
148 vsub_u8(fVec.val[3], o.fVec.val[3]), 150 vsub_u8(fVec.val[3], o.fVec.val[3]),
149 }}; 151 }};
150 } 152 }
151 SkPx_neon saturatedAdd(const SkPx_neon& o) const { 153 SkPx saturatedAdd(const SkPx& o) const {
152 return (uint8x8x4_t) {{ 154 return (uint8x8x4_t) {{
153 vqadd_u8(fVec.val[0], o.fVec.val[0]), 155 vqadd_u8(fVec.val[0], o.fVec.val[0]),
154 vqadd_u8(fVec.val[1], o.fVec.val[1]), 156 vqadd_u8(fVec.val[1], o.fVec.val[1]),
155 vqadd_u8(fVec.val[2], o.fVec.val[2]), 157 vqadd_u8(fVec.val[2], o.fVec.val[2]),
156 vqadd_u8(fVec.val[3], o.fVec.val[3]), 158 vqadd_u8(fVec.val[3], o.fVec.val[3]),
157 }}; 159 }};
158 } 160 }
159 161
160 Wide operator*(const Alpha& a) const { 162 Wide operator*(const Alpha& a) const {
161 return (uint16x8x4_t) {{ 163 return (uint16x8x4_t) {{
162 vmull_u8(fVec.val[0], a.fA), 164 vmull_u8(fVec.val[0], a.fA),
163 vmull_u8(fVec.val[1], a.fA), 165 vmull_u8(fVec.val[1], a.fA),
164 vmull_u8(fVec.val[2], a.fA), 166 vmull_u8(fVec.val[2], a.fA),
165 vmull_u8(fVec.val[3], a.fA), 167 vmull_u8(fVec.val[3], a.fA),
166 }}; 168 }};
167 } 169 }
168 SkPx_neon approxMulDiv255(const Alpha& a) const { 170 SkPx approxMulDiv255(const Alpha& a) const {
169 return (*this * a).addNarrowHi(*this); 171 return (*this * a).addNarrowHi(*this);
170 } 172 }
171 173
172 SkPx_neon addAlpha(const Alpha& a) const { 174 SkPx addAlpha(const Alpha& a) const {
173 return (uint8x8x4_t) {{ 175 return (uint8x8x4_t) {{
174 fVec.val[0], 176 fVec.val[0],
175 fVec.val[1], 177 fVec.val[1],
176 fVec.val[2], 178 fVec.val[2],
177 vadd_u8(fVec.val[3], a.fA), 179 vadd_u8(fVec.val[3], a.fA),
178 }}; 180 }};
179 } 181 }
180 }; 182 };
181 typedef SkPx_neon SkPx; 183
184 } // namespace neon
185
186 typedef neon::SkPx SkPx;
182 187
183 #endif//SkPx_neon_DEFINED 188 #endif//SkPx_neon_DEFINED
OLDNEW
« no previous file with comments | « no previous file | src/opts/SkPx_none.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698