Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(191)

Side by Side Diff: include/libyuv/cpu/mips/macros_msa.h

Issue 2285683002: Add MIPS SIMD Arch (MSA) optimized MirrorRow function (Closed)
Patch Set: Changes as per review comments Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef __MACROS_MSA_H__
12 #define __MACROS_MSA_H__
fbarchard1 2016/09/14 01:48:04 Could you add: #if !defined(LIBYUV_DISABLE_MSA) &
manojkumar.bhosale 2016/09/14 12:45:29 Done.
13
14 #include <stdint.h>
15 #include <msa.h>
16
17 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc))
18 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
19
20 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in)
21 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
22
23 /* Description : Load two vectors with 16 'byte' sized elements
24 Arguments : Inputs - psrc, stride
25 Outputs - out0, out1
26 Return Type - as per RTYPE
27 Details : Load 16 byte elements in 'out0' from (psrc)
28 Load 16 byte elements in 'out1' from (psrc + stride)
29 */
30 #define LD_B2(RTYPE, psrc, stride, out0, out1) \
31 { \
32 out0 = LD_B(RTYPE, (psrc)); \
33 out1 = LD_B(RTYPE, (psrc) + stride); \
34 }
35 #define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
36 #define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
37
38 #define LD_B3(RTYPE, psrc, stride, out0, out1, out2) \
39 { \
40 LD_B2(RTYPE, (psrc), stride, out0, out1); \
41 out2 = LD_B(RTYPE, (psrc) + 2 * stride); \
42 }
43 #define LD_UB3(...) LD_B3(v16u8, __VA_ARGS__)
44 #define LD_SB3(...) LD_B3(v16i8, __VA_ARGS__)
45
46 #define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
47 { \
48 LD_B2(RTYPE, (psrc), stride, out0, out1); \
49 LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
50 }
51 #define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
52 #define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
53
54 #define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \
55 { \
56 LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
57 out4 = LD_B(RTYPE, (psrc) + 4 * stride); \
58 }
59 #define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__)
60 #define LD_SB5(...) LD_B5(v16i8, __VA_ARGS__)
61
62 #define LD_B6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \
63 { \
64 LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
65 LD_B2(RTYPE, (psrc) + 4 * stride, stride, out4, out5); \
66 }
67 #define LD_UB6(...) LD_B6(v16u8, __VA_ARGS__)
68 #define LD_SB6(...) LD_B6(v16i8, __VA_ARGS__)
69
70 #define LD_B7(RTYPE, psrc, stride, \
71 out0, out1, out2, out3, out4, out5, out6) \
72 { \
73 LD_B5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4); \
74 LD_B2(RTYPE, (psrc) + 5 * stride, stride, out5, out6); \
75 }
76 #define LD_UB7(...) LD_B7(v16u8, __VA_ARGS__)
77 #define LD_SB7(...) LD_B7(v16i8, __VA_ARGS__)
78
79 #define LD_B8(RTYPE, psrc, stride, \
80 out0, out1, out2, out3, out4, out5, out6, out7) \
81 { \
82 LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
83 LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \
84 }
85 #define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__)
86
87 /* Description : Store two vectors with stride each having 16 'byte' sized
88 elements
89 Arguments : Inputs - in0, in1, pdst, stride
90 Details : Store 16 byte elements from 'in0' to (pdst)
91 Store 16 byte elements from 'in1' to (pdst + stride)
92 */
93 #define ST_B2(RTYPE, in0, in1, pdst, stride) \
94 { \
95 ST_B(RTYPE, in0, (pdst)); \
96 ST_B(RTYPE, in1, (pdst) + stride); \
97 }
98 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
99 #define ST_SB2(...) ST_B2(v16i8, __VA_ARGS__)
100
101 #define ST_B3(RTYPE, in0, in1, in2, pdst, stride) \
102 { \
103 ST_B2(RTYPE, in0, in1, (pdst), stride); \
104 ST_B(RTYPE, in2, (pdst) + 2 * stride); \
105 }
106 #define ST_UB3(...) ST_B3(v16u8, __VA_ARGS__)
107
108 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \
109 { \
110 ST_B2(RTYPE, in0, in1, (pdst), stride); \
111 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
112 }
113 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
114 #define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__)
115
116 #define ST_B5(RTYPE, in0, in1, in2, in3, in4, pdst, stride) \
117 { \
118 ST_B2(RTYPE, in0, in1, (pdst), stride); \
119 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
120 ST_B(RTYPE, in4, (pdst) + 4 * stride); \
121 }
122 #define ST_UB5(...) ST_B5(v16u8, __VA_ARGS__)
123
124 #define ST_B6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \
125 { \
126 ST_B2(RTYPE, in0, in1, (pdst), stride); \
127 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
128 ST_B2(RTYPE, in4, in5, (pdst) + 4 * stride, stride); \
129 }
130 #define ST_UB6(...) ST_B6(v16u8, __VA_ARGS__)
131
132 #define ST_B7(RTYPE, in0, in1, in2, in3, in4, in5, in6, pdst, stride) \
133 { \
134 ST_B2(RTYPE, in0, in1, (pdst), stride); \
135 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
136 ST_B2(RTYPE, in4, in5, (pdst) + 4 * stride, stride); \
137 ST_B(RTYPE, in6, (pdst) + 6 * stride); \
138 }
139 #define ST_UB7(...) ST_B7(v16u8, __VA_ARGS__)
140
141 #define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
142 pdst, stride) \
143 { \
144 ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride); \
145 ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \
146 }
147 #define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__)
148
149 /* Description : Shuffle byte vector elements as per mask vector
150 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
151 Outputs - out0, out1
152 Return Type - as per RTYPE
153 Details : Byte elements from 'in0' & 'in1' are copied selectively to
154 'out0' as per control vector 'mask0'
155 */
156 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
157 { \
158 out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
159 out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
160 }
161 #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
162 #endif /* __MACROS_MSA_H__ */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698