OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #ifndef __MACROS_MSA_H__ | |
fbarchard1
2016/08/31 01:20:42
unconventional to have a platform specific header
| |
12 #define __MACROS_MSA_H__ | |
13 | |
14 #include <stdint.h> | |
15 #include <msa.h> | |
16 | |
17 #define LD_B(RTYPE, psrc) *((RTYPE *)(psrc)) | |
18 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) | |
19 | |
20 #define ST_B(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in) | |
fbarchard1
2016/08/31 01:20:42
no space before * in (RTYPE*)
| |
21 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) | |
22 | |
23 /* Description : Load two vectors with 16 'byte' sized elements | |
24 Arguments : Inputs - psrc, stride | |
25 Outputs - out0, out1 | |
26 Return Type - as per RTYPE | |
27 Details : Load 16 byte elements in 'out0' from (psrc) | |
28 Load 16 byte elements in 'out1' from (psrc + stride) | |
29 */ | |
30 #define LD_B2(RTYPE, psrc, stride, out0, out1) \ | |
31 { \ | |
32 out0 = LD_B(RTYPE, (psrc)); \ | |
33 out1 = LD_B(RTYPE, (psrc) + stride); \ | |
34 } | |
35 #define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__) | |
36 | |
37 #define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \ | |
38 { \ | |
39 LD_B2(RTYPE, (psrc), stride, out0, out1); \ | |
40 LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \ | |
41 } | |
42 #define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) | |
43 | |
44 #define LD_B8(RTYPE, psrc, stride, \ | |
45 out0, out1, out2, out3, out4, out5, out6, out7) \ | |
46 { \ | |
47 LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ | |
48 LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \ | |
49 } | |
50 #define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__) | |
51 | |
52 /* Description : Store two vectors with stride each having 16 'byte' sized | |
53 elements | |
54 Arguments : Inputs - in0, in1, pdst, stride | |
55 Details : Store 16 byte elements from 'in0' to (pdst) | |
56 Store 16 byte elements from 'in1' to (pdst + stride) | |
57 */ | |
58 #define ST_B2(RTYPE, in0, in1, pdst, stride) \ | |
59 { \ | |
60 ST_B(RTYPE, in0, (pdst)); \ | |
61 ST_B(RTYPE, in1, (pdst) + stride); \ | |
62 } | |
63 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) | |
64 | |
65 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ | |
66 { \ | |
67 ST_B2(RTYPE, in0, in1, (pdst), stride); \ | |
68 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ | |
69 } | |
70 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) | |
71 | |
72 #define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ | |
73 pdst, stride) \ | |
74 { \ | |
75 ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride); \ | |
76 ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \ | |
77 } | |
78 #define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__) | |
79 | |
80 /* Description : Shuffle byte vector elements as per mask vector | |
81 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 | |
82 Outputs - out0, out1 | |
83 Return Type - as per RTYPE | |
84 Details : Byte elements from 'in0' & 'in1' are copied selectively to | |
85 'out0' as per control vector 'mask0' | |
86 */ | |
87 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ | |
88 { \ | |
89 out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \ | |
90 out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \ | |
91 } | |
92 #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) | |
93 | |
94 #endif /* __MACROS_MSA_H__ */ | |
OLD | NEW |