OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #ifndef __MACROS_MSA_H__ | 11 #ifndef INCLUDE_LIBYUV_MACROS_MSA_H_ // NOLINT |
12 #define __MACROS_MSA_H__ | 12 #define INCLUDE_LIBYUV_MACROS_MSA_H_ |
13 | 13 |
14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
15 #include <stdint.h> | 15 #include <stdint.h> |
16 #include <msa.h> | 16 #include <msa.h> |
17 | 17 |
18 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) | 18 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) |
19 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) | 19 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) |
20 | 20 |
21 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) | 21 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) |
22 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) | 22 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) |
23 | 23 |
24 /* Description : Load two vectors with 16 'byte' sized elements | 24 /* Description : Load two vectors with 16 'byte' sized elements |
25 Arguments : Inputs - psrc, stride | 25 Arguments : Inputs - psrc, stride |
26 Outputs - out0, out1 | 26 Outputs - out0, out1 |
27 Return Type - as per RTYPE | 27 Return Type - as per RTYPE |
28 Details : Load 16 byte elements in 'out0' from (psrc) | 28 Details : Load 16 byte elements in 'out0' from (psrc) |
29 Load 16 byte elements in 'out1' from (psrc + stride) | 29 Load 16 byte elements in 'out1' from (psrc + stride) |
30 */ | 30 */ |
31 #define LD_B2(RTYPE, psrc, stride, out0, out1) { \ | 31 #define LD_B2(RTYPE, psrc, stride, out0, out1) { \ |
32 out0 = LD_B(RTYPE, (psrc)); \ | 32 out0 = LD_B(RTYPE, (psrc)); \ |
33 out1 = LD_B(RTYPE, (psrc) + stride); \ | 33 out1 = LD_B(RTYPE, (psrc) + stride); \ |
34 } | 34 } |
35 #define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__) | 35 #define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__) |
36 #define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__) | |
37 | 36 |
38 #define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \ | 37 #define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \ |
39 LD_B2(RTYPE, (psrc), stride, out0, out1); \ | 38 LD_B2(RTYPE, (psrc), stride, out0, out1); \ |
40 LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \ | 39 LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \ |
41 } | 40 } |
42 #define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) | 41 #define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) |
43 #define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__) | |
44 | 42 |
45 /* Description : Store two vectors with stride each having 16 'byte' sized | 43 /* Description : Store two vectors with stride each having 16 'byte' sized |
46 elements | 44 elements |
47 Arguments : Inputs - in0, in1, pdst, stride | 45 Arguments : Inputs - in0, in1, pdst, stride |
48 Details : Store 16 byte elements from 'in0' to (pdst) | 46 Details : Store 16 byte elements from 'in0' to (pdst) |
49 Store 16 byte elements from 'in1' to (pdst + stride) | 47 Store 16 byte elements from 'in1' to (pdst + stride) |
50 */ | 48 */ |
51 #define ST_B2(RTYPE, in0, in1, pdst, stride) { \ | 49 #define ST_B2(RTYPE, in0, in1, pdst, stride) { \ |
52 ST_B(RTYPE, in0, (pdst)); \ | 50 ST_B(RTYPE, in0, (pdst)); \ |
53 ST_B(RTYPE, in1, (pdst) + stride); \ | 51 ST_B(RTYPE, in1, (pdst) + stride); \ |
54 } | 52 } |
55 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) | 53 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) |
56 #define ST_SB2(...) ST_B2(v16i8, __VA_ARGS__) | 54 # |
57 | |
58 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \ | 55 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \ |
59 ST_B2(RTYPE, in0, in1, (pdst), stride); \ | 56 ST_B2(RTYPE, in0, in1, (pdst), stride); \ |
60 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ | 57 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ |
61 } | 58 } |
62 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) | 59 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) |
63 #define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__) | 60 # |
64 | |
65 /* Description : Shuffle byte vector elements as per mask vector | 61 /* Description : Shuffle byte vector elements as per mask vector |
66 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 | 62 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 |
67 Outputs - out0, out1 | 63 Outputs - out0, out1 |
68 Return Type - as per RTYPE | 64 Return Type - as per RTYPE |
69 Details : Byte elements from 'in0' & 'in1' are copied selectively to | 65 Details : Byte elements from 'in0' & 'in1' are copied selectively to |
70 'out0' as per control vector 'mask0' | 66 'out0' as per control vector 'mask0' |
71 */ | 67 */ |
72 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \ | 68 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \ |
73 out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \ | 69 out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \ |
74 out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \ | 70 out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \ |
75 } | 71 } |
76 #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) | 72 #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) |
77 | 73 |
78 #define VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \ | 74 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ |
79 out0 = (RTYPE) __msa_vshf_w((v4i32) mask0, (v4i32) in1, (v4i32) in0); \ | |
80 out1 = (RTYPE) __msa_vshf_w((v4i32) mask1, (v4i32) in3, (v4i32) in2); \ | |
81 } | |
82 #define VSHF_W2_UB(...) VSHF_W2(v16u8, __VA_ARGS__) | |
83 #define VSHF_W2_SB(...) VSHF_W2(v16i8, __VA_ARGS__) | |
84 #define VSHF_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ | |
85 mask0, mask1, mask2, mask3, \ | |
86 out0, out1, out2, out3) { \ | |
87 VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ | |
88 VSHF_W2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3) \ | |
89 } | |
90 #define VSHF_W4_UB(...) VSHF_W4(v16u8, __VA_ARGS__) | |
91 | 75 |
92 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ | 76 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ NOLINT |
93 #endif /* __MACROS_MSA_H__ */ | |
OLD | NEW |