Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1093)

Side by Side Diff: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

Issue 2304183002: Add MSA (MIPS SIMD Arch) optimized WebGL image conversion functions (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CommonMacrosMSA_h 5 #ifndef CommonMacrosMSA_h
6 #define CommonMacrosMSA_h 6 #define CommonMacrosMSA_h
7 7
8 #include <msa.h> 8 #include <msa.h>
9 #include <stdint.h> 9 #include <stdint.h>
10 10
11 #if defined(__clang__) 11 #if defined(__clang__)
12 #define CLANG_BUILD 12 #define CLANG_BUILD
13 #endif 13 #endif
14 14
15 #ifdef CLANG_BUILD 15 #ifdef CLANG_BUILD
16 #define SRLI_B(a, b) __msa_srli_b((v16i8)a, b)
16 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b) 17 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)
18 #define SLLI_B(a, b) __msa_slli_b((v16i8)a, b)
17 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b) 19 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)
20 #define CEQI_B(a, b) __msa_ceqi_b((v16i8)a, b)
18 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b) 21 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)
22 #define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)
19 #else 23 #else
24 #define SRLI_B(a, b) ((v16u8)a >> b)
20 #define SRLI_H(a, b) ((v8u16)a >> b) 25 #define SRLI_H(a, b) ((v8u16)a >> b)
26 #define SLLI_B(a, b) ((v16i8)a << b)
21 #define SLLI_H(a, b) ((v8i16)a << b) 27 #define SLLI_H(a, b) ((v8i16)a << b)
28 #define CEQI_B(a, b) (a == b)
22 #define CEQI_H(a, b) (a == b) 29 #define CEQI_H(a, b) (a == b)
30 #define ANDI_B(a, b) ((v16u8)a & b)
23 #endif 31 #endif
24 32
25 #define LD_V(RTYPE, psrc) *((RTYPE*)(psrc)) 33 #define LD_V(RTYPE, psrc) *((RTYPE*)(psrc))
26 #define LD_UB(...) LD_V(v16u8, __VA_ARGS__) 34 #define LD_UB(...) LD_V(v16u8, __VA_ARGS__)
27 #define LD_UH(...) LD_V(v8u16, __VA_ARGS__) 35 #define LD_UH(...) LD_V(v8u16, __VA_ARGS__)
28 #define LD_SP(...) LD_V(v4f32, __VA_ARGS__) 36 #define LD_SP(...) LD_V(v4f32, __VA_ARGS__)
29 #define LD_DP(...) LD_V(v2f64, __VA_ARGS__) 37 #define LD_DP(...) LD_V(v2f64, __VA_ARGS__)
30 38
31 #define ST_V(RTYPE, in, pdst) *((RTYPE*)(pdst)) = in 39 #define ST_V(RTYPE, in, pdst) *((RTYPE*)(pdst)) = in
32 #define ST_UB(...) ST_V(v16u8, __VA_ARGS__) 40 #define ST_UB(...) ST_V(v16u8, __VA_ARGS__)
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
127 135
128 #define LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \ 136 #define LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \
129 { \ 137 { \
130 LD_V2(RTYPE, psrc, stride, out0, out1); \ 138 LD_V2(RTYPE, psrc, stride, out0, out1); \
131 LD_V2(RTYPE, psrc, stride, out2, out3); \ 139 LD_V2(RTYPE, psrc, stride, out2, out3); \
132 } 140 }
133 #define LD_UB4(...) LD_V4(v16u8, __VA_ARGS__) 141 #define LD_UB4(...) LD_V4(v16u8, __VA_ARGS__)
134 #define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__) 142 #define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__)
135 #define LD_SP4(...) LD_V4(v4f32, __VA_ARGS__) 143 #define LD_SP4(...) LD_V4(v4f32, __VA_ARGS__)
136 144
145 #define LD_V6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \
146 { \
147 LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \
148 LD_V2(RTYPE, psrc, stride, out4, out5); \
149 }
150 #define LD_UB6(...) LD_V6(v16u8, __VA_ARGS__)
151 #define LD_UH6(...) LD_V6(v8u16, __VA_ARGS__)
152 #define LD_SP6(...) LD_V6(v4f32, __VA_ARGS__)
153
154 #define LD_V8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, out 7) \
155 { \
156 LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \
157 LD_V4(RTYPE, psrc, stride, out4, out5, out6, out7); \
158 }
159 #define LD_UB8(...) LD_V8(v16u8, __VA_ARGS__)
160 #define LD_UH8(...) LD_V8(v8u16, __VA_ARGS__)
161 #define LD_SP8(...) LD_V8(v4f32, __VA_ARGS__)
162 #define LD_DP8(...) LD_V8(v2f64, __VA_ARGS__)
163
137 /* Description : Store vectors of elements with stride 164 /* Description : Store vectors of elements with stride
138 * Arguments : Inputs - in0, in1, pdst, stride 165 * Arguments : Inputs - in0, in1, pdst, stride
139 * Details : Store elements from 'in0' to (pdst) 166 * Details : Store elements from 'in0' to (pdst)
140 * Store elements from 'in1' to (pdst + stride) 167 * Store elements from 'in1' to (pdst + stride)
141 */ 168 */
142 #define ST_V2(RTYPE, in0, in1, pdst, stride) \ 169 #define ST_V2(RTYPE, in0, in1, pdst, stride) \
143 { \ 170 { \
144 ST_V(RTYPE, in0, pdst); \ 171 ST_V(RTYPE, in0, pdst); \
145 pdst += stride; \ 172 pdst += stride; \
146 ST_V(RTYPE, in1, pdst); \ 173 ST_V(RTYPE, in1, pdst); \
(...skipping 13 matching lines...) Expand all
160 #define ST_UH3(...) ST_V3(v8u16, __VA_ARGS__) 187 #define ST_UH3(...) ST_V3(v8u16, __VA_ARGS__)
161 188
162 #define ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \ 189 #define ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \
163 { \ 190 { \
164 ST_V2(RTYPE, in0, in1, pdst, stride); \ 191 ST_V2(RTYPE, in0, in1, pdst, stride); \
165 ST_V2(RTYPE, in2, in3, pdst, stride); \ 192 ST_V2(RTYPE, in2, in3, pdst, stride); \
166 } 193 }
167 #define ST_UB4(...) ST_V4(v16u8, __VA_ARGS__) 194 #define ST_UB4(...) ST_V4(v16u8, __VA_ARGS__)
168 #define ST_UH4(...) ST_V4(v8u16, __VA_ARGS__) 195 #define ST_UH4(...) ST_V4(v8u16, __VA_ARGS__)
169 #define ST_SP4(...) ST_V4(v4f32, __VA_ARGS__) 196 #define ST_SP4(...) ST_V4(v4f32, __VA_ARGS__)
197
170 #define ST_V6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \ 198 #define ST_V6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \
171 { \ 199 { \
172 ST_V3(RTYPE, in0, in1, in2, pdst, stride); \ 200 ST_V3(RTYPE, in0, in1, in2, pdst, stride); \
173 ST_V3(RTYPE, in3, in4, in5, pdst, stride); \ 201 ST_V3(RTYPE, in3, in4, in5, pdst, stride); \
174 } 202 }
175 #define ST_UB6(...) ST_V6(v16u8, __VA_ARGS__) 203 #define ST_UB6(...) ST_V6(v16u8, __VA_ARGS__)
176 #define ST_SP6(...) ST_V6(v4f32, __VA_ARGS__) 204 #define ST_SP6(...) ST_V6(v4f32, __VA_ARGS__)
177 205
178 #define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \ 206 #define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
179 { \ 207 { \
180 ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride); \ 208 ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride); \
181 ST_V4(RTYPE, in4, in5, in6, in7, pdst, stride); \ 209 ST_V4(RTYPE, in4, in5, in6, in7, pdst, stride); \
182 } 210 }
183 #define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__) 211 #define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__)
184 #define ST_SP8(...) ST_V8(v4f32, __VA_ARGS__) 212 #define ST_SP8(...) ST_V8(v4f32, __VA_ARGS__)
185 213
214 /* Description : Logical and in0 and in1.
215 Arguments : Inputs - in0, in1, in2, in3,
216 Outputs - out0, out1, out2, out3
217 Return Type - as per RTYPE
218 Details : Each unsigned word element from 'in0' vector is added with
219 each unsigned word element from 'in1' vector. Then the average
220 is calculated and written to 'out0'
221 */
222 #define AND_V2(RTYPE, in0, in1, mask, out0, out1) \
223 { \
224 out0 = (RTYPE)__msa_and_v((v16u8)in0, (v16u8)mask); \
225 out1 = (RTYPE)__msa_and_v((v16u8)in1, (v16u8)mask); \
226 }
227 #define AND_V2_UB(...) AND_V2(v16u8, __VA_ARGS__)
228
229 #define AND_V4(RTYPE, in0, in1, in2, in3, mask, out0, out1, out2, out3) \
230 { \
231 AND_V2(RTYPE, in0, in1, mask, out0, out1); \
232 AND_V2(RTYPE, in2, in3, mask, out2, out3); \
233 }
234 #define AND_V4_UB(...) AND_V4(v16u8, __VA_ARGS__)
235
236 /* Description : Logical equate of input vectors with immediate value
237 Arguments : Inputs - in0, in1, val
238 Outputs - in place operation
239 Return Type - as per RTYPE
240 Details : Each unsigned byte element from input vector 'in0' & 'in1' is
241 logically and'ed with immediate mask and the result
242 is stored in-place.
243 */
244 #define CEQI_B2(RTYPE, in0, in1, val, out0, out1) \
245 { \
246 out0 = CEQI_B(in0, val); \
247 out1 = CEQI_B(in1, val); \
248 }
249 #define CEQI_B2_UB(...) CEQI_B2(v16u8, __VA_ARGS__)
250
251 #define CEQI_B4(RTYPE, in0, in1, in2, in3, val, out0, out1, out2, out3) \
252 { \
253 CEQI_B2(RTYPE, in0, in1, val, out0, out1); \
254 CEQI_B2(RTYPE, in2, in3, val, out2, out3); \
255 }
256 #define CEQI_B4_UB(...) CEQI_B4(v16u8, __VA_ARGS__)
257
258 /* Description : Immediate number of elements to slide
259 * Arguments : Inputs - in0, in1, slide_val
260 * Outputs - out
261 * Return Type - as per RTYPE
262 * Details : Byte elements from 'in1' vector are slid into 'in0' by
263 * value specified in the 'slide_val'
264 */
265 #define SLDI_B(RTYPE, in0, in1, slide_val) \
266 (RTYPE)__msa_sldi_b((v16i8)in0, (v16i8)in1, slide_val)
267 #define SLDI_UB(...) SLDI_B(v16u8, __VA_ARGS__)
268 #define SLDI_D(...) SLDI_B(v2f64, __VA_ARGS__)
269
270 /* Description : Immediate number of elements to slide
271 Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
272 Outputs - out0, out1
273 Return Type - as per RTYPE
274 Details : Byte elements from 'in0_0' vector are slid into 'in1_0' by
275 value specified in the 'slide_val'
276 */
277 #define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \
278 { \
279 out0 = SLDI_B(RTYPE, in0_0, in1_0, slide_val); \
280 out1 = SLDI_B(RTYPE, in0_1, in1_1, slide_val); \
281 }
282 #define SLDI_B2_UB(...) SLDI_B2(v16u8, __VA_ARGS__)
283
284 /* Description : Shuffle byte vector elements as per variable
285 Arguments : Inputs - in0, in1, shf_val
286 Outputs - out0, out1
287 Return Type - as per RTYPE
288 Details : Byte elements from 'in0' & 'in1' are copied selectively to
289 'out0' as per control variable 'shf_val'.
290 */
291 #define SHF_B2(RTYPE, in0, in1, shf_val) \
292 { \
293 in0 = (RTYPE)__msa_shf_b((v16i8)in0, shf_val); \
294 in1 = (RTYPE)__msa_shf_b((v16i8)in1, shf_val); \
295 }
296 #define SHF_B2_UB(...) SHF_B2(v16u8, __VA_ARGS__)
297 #define SHF_B2_UH(...) SHF_B2(v8u16, __VA_ARGS__)
298
299 #define SHF_B4(RTYPE, in0, in1, in2, in3, shf_val) \
300 { \
301 SHF_B2(RTYPE, in0, in1, shf_val); \
302 SHF_B2(RTYPE, in2, in3, shf_val); \
303 }
304 #define SHF_B4_UB(...) SHF_B4(v16u8, __VA_ARGS__)
305 #define SHF_B4_UH(...) SHF_B4(v8u16, __VA_ARGS__)
306
307 /* Description : Interleave even byte elements from vectors
308 Arguments : Inputs - in0, in1, in2, in3
309 Outputs - out0, out1
310 Return Type - as per RTYPE
311 Details : Even byte elements of 'in0' and 'in1' are interleaved
312 and written to 'out0'
313 */
314 #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
315 { \
316 out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \
317 out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \
318 }
319 #define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__)
320 #define ILVEV_B2_UH(...) ILVEV_B2(v8u16, __VA_ARGS__)
321
322 #define ILVEV_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
323 { \
324 ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
325 out2 = (RTYPE)__msa_ilvev_b((v16i8)in5, (v16i8)in4); \
326 }
327 #define ILVEV_B3_UH(...) ILVEV_B3(v8u16, __VA_ARGS__)
328
186 /* Description : Interleave even halfword elements from vectors 329 /* Description : Interleave even halfword elements from vectors
187 Arguments : Inputs - in0, in1, in2, in3 330 Arguments : Inputs - in0, in1, in2, in3
188 Outputs - out0, out1 331 Outputs - out0, out1
189 Return Type - as per RTYPE 332 Return Type - as per RTYPE
190 Details : Even halfword elements of 'in0' and 'in1' are interleaved 333 Details : Even halfword elements of 'in0' and 'in1' are interleaved
191 and written to 'out0' 334 and written to 'out0'
192 */ 335 */
193 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ 336 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
194 { \ 337 { \
195 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \ 338 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \
196 out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \ 339 out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \
197 } 340 }
198 #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__) 341 #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)
199 342
200 /* Description : Interleave both left and right half of input vectors 343 /* Description : Interleave both left and right half of input vectors
201 Arguments : Inputs - in0, in1 344 Arguments : Inputs - in0, in1
202 Outputs - out0, out1 345 Outputs - out0, out1
203 Return Type - as per RTYPE 346 Return Type - as per RTYPE
204 Details : Right half of byte elements from 'in0' and 'in1' are 347 Details : Right half of byte elements from 'in0' and 'in1' are
205 interleaved and written to 'out0' 348 interleaved and written to 'out0'
206 */ 349 */
207 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ 350 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
208 { \ 351 { \
209 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ 352 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
210 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ 353 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
211 } 354 }
212 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) 355 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
213 356
357 #define ILVRL_H2(RTYPE, in0, in1, out0, out1) \
358 { \
359 out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
360 out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
361 }
362 #define ILVRL_H2_UB(...) ILVRL_H2(v16u8, __VA_ARGS__)
363
364 /* Description : Interleave both odd and even half of input vectors
365 Arguments : Inputs - in0, in1
366 Outputs - out0, out1
367 Return Type - as per RTYPE
368 Details : Odd half of byte elements from 'in0' and 'in1' are
369 interleaved and written to 'out0'
370 */
371 #define ILVODEV_B2(RTYPE, in0, in1, out0, out1) \
372 { \
373 out0 = (RTYPE)__msa_ilvod_b((v16i8)in0, (v16i8)in1); \
374 out1 = (RTYPE)__msa_ilvev_b((v16i8)in0, (v16i8)in1); \
375 }
376 #define ILVODEV_B2_UB(...) ILVODEV_B2(v16u8, __VA_ARGS__)
377
378 /* Description : Pack even halfword elements of vector pairs
379 Arguments : Inputs - in0, in1, in2, in3
380 Outputs - out0, out1
381 Return Type - as per RTYPE
382 Details : Even halfword elements of 'in0' are copied to the left half of
383 'out0' & even halfword elements of 'in1' are copied to the
384 right half of 'out0'.
385 */
386 #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
387 { \
388 out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \
389 out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3); \
390 }
391 #define PCKEV_H2_UB(...) PCKEV_H2(v16u8, __VA_ARGS__)
392
393 #define PCKEV_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
394 { \
395 PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
396 out2 = (RTYPE)__msa_pckev_h((v8i16)in4, (v8i16)in5); \
397 }
398 #define PCKEV_H3_UB(...) PCKEV_H3(v16u8, __VA_ARGS__)
399
400 #define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2 , out3) \
401 { \
402 PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
403 PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
404 }
405 #define PCKEV_H4_UB(...) PCKEV_H4(v16u8, __VA_ARGS__)
406
407 /* Description : Pack odd halfword elements of vector pairs
408 Arguments : Inputs - in0, in1, in2, in3
409 Outputs - out0, out1
410 Return Type - as per RTYPE
411 Details : Odd halfword elements of 'in0' are copied to the left half of
412 'out0' & odd halfword elements of 'in1' are copied to the
413 right half of 'out0'.
414 */
415 #define PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
416 { \
417 out0 = (RTYPE)__msa_pckod_h((v8i16)in0, (v8i16)in1); \
418 out1 = (RTYPE)__msa_pckod_h((v8i16)in2, (v8i16)in3); \
419 }
420 #define PCKOD_H2_UB(...) PCKOD_H2(v16u8, __VA_ARGS__)
421
422 #define PCKOD_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
423 { \
424 PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
425 out2 = (RTYPE)__msa_pckod_h((v8i16)in4, (v8i16)in5); \
426 }
427 #define PCKOD_H3_UB(...) PCKOD_H3(v16u8, __VA_ARGS__)
428
429 #define PCKOD_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2 , out3) \
430 { \
431 PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
432 PCKOD_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
433 }
434 #define PCKOD_H4_UB(...) PCKOD_H4(v16u8, __VA_ARGS__)
435
436 /* Description : Logical shift right all elements of half-word vector
437 Arguments : Inputs - in0, in1, shift
438 Outputs - in place operation
439 Return Type - as per input vector RTYPE
440 Details : Each element of vector 'in0' is right shifted by 'shift' and
441 the result is written in-place. 'shift' is a GP variable.
442 */
443 #define SRLI_B2(RTYPE, in0, in1, shift_val) \
444 { \
445 in0 = (RTYPE)SRLI_B(in0, shift_val); \
446 in1 = (RTYPE)SRLI_B(in1, shift_val); \
447 }
448 #define SRLI_B2_UB(...) SRLI_B2(v16u8, __VA_ARGS__)
449
450 #define SRLI_B3(RTYPE, in0, in1, in2, shift_val) \
451 { \
452 SRLI_B2(RTYPE, in0, in1, shift_val); \
453 in2 = (RTYPE)SRLI_B(in2, shift_val); \
454 }
455 #define SRLI_B3_UB(...) SRLI_B3(v16u8, __VA_ARGS__)
456
457 #define SRLI_B4(RTYPE, in0, in1, in2, in3, shift_val) \
458 { \
459 SRLI_B2(RTYPE, in0, in1, shift_val); \
460 SRLI_B2(RTYPE, in2, in3, shift_val); \
461 }
462 #define SRLI_B4_UB(...) SRLI_B4(v16u8, __VA_ARGS__)
463
464 /* Description : Immediate Bit Insert Right (immediate)
465 Arguments : Inputs - in0, in1, in2, in3, shift
466 Outputs - out0, out1
467 Return Type - as per RTYPE
468 Details : Copy least significant (right) bits in each element of vector
469 'in1' to elements in vector in0 while preserving the most
470 significant (left) bits. The number of bits to copy is given
471 by the immediate 'shift + 1'.
472 */
473 #define BINSRI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift) \
474 { \
475 out0 = (RTYPE)__msa_binsri_b((v16u8)in0, (v16u8)in1, shift); \
476 out1 = (RTYPE)__msa_binsri_b((v16u8)in2, (v16u8)in3, shift); \
477 }
478 #define BINSRI_B2_UB(...) BINSRI_B2(v16u8, __VA_ARGS__)
479
480 #define BINSRI_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2, shift) \
481 { \
482 BINSRI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift); \
483 out2 = (RTYPE)__msa_binsri_b((v16u8)in4, (v16u8)in5, shift); \
484 }
485 #define BINSRI_B3_UB(...) BINSRI_B3(v16u8, __VA_ARGS__)
486
487 /* Description : Multiplication of pairs of vectors
488 Arguments : Inputs - in0, in1, in2, in3
489 Outputs - out0, out1
490 Details : Each element from 'in0' is multiplied with elements from 'in1'
491 and the result is written to 'out0'
492 */
493 #define MUL2(in0, in1, in2, in3, out0, out1) \
494 { \
495 out0 = in0 * in1; \
496 out1 = in2 * in3; \
497 }
498 #define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
499 { \
500 MUL2(in0, in1, in2, in3, out0, out1); \
501 MUL2(in4, in5, in6, in7, out2, out3); \
502 }
503
504 /* Description : Division of pairs of vectors
505 Arguments : Inputs - in0, in1, in2, in3
506 Outputs - out0, out1
507 Details : Each element from 'in0' is divided by elements from 'in1'
508 and the result is written to 'out0'
509 */
510 #define DIV2(in0, in1, in2, in3, out0, out1) \
511 { \
512 out0 = in0 / in1; \
513 out1 = in2 / in3; \
514 }
515 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
516 { \
517 DIV2(in0, in1, in2, in3, out0, out1); \
518 DIV2(in4, in5, in6, in7, out2, out3); \
519 }
520
521 /* Description : Vector Floating-Point Convert from Unsigned Integer
522 Arguments : Inputs - in0, in1
523 Outputs - out0, out1
524 Details :
525 */
526 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \
527 { \
528 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \
529 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \
530 }
531 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__)
532
533 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
534 { \
535 FFINTU_W2(RTYPE, in0, in1, out0, out1); \
536 FFINTU_W2(RTYPE, in2, in3, out2, out3); \
537 }
538 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__)
539
540 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer
541 Arguments : Inputs - in0, in1
542 Outputs - out0, out1
543 Details :
544 */
545 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \
546 { \
547 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \
548 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \
549 }
550 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__)
551
552 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
553 { \
554 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \
555 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \
556 }
557 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__)
558
214 #endif // CommonMacrosMSA_h 559 #endif // CommonMacrosMSA_h
OLDNEW
« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698