Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(284)

Side by Side Diff: source/row_msa.cc

Issue 2285683002: Add MIPS SIMD Arch (MSA) optimized MirrorRow function (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« include/libyuv/macros_msa.h ('K') | « libyuv_test.gyp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12 #include "libyuv/macros_msa.h"
fbarchard1 2016/08/31 01:20:42 This header wont compile on intel/arm as is. May
13
14 #ifdef __cplusplus
15 namespace libyuv {
16 extern "C" {
17 #endif
18
19 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa)
20
21 void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width)
22 {
23 int32_t count;
24 uint8_t val0, val1, val2, val3;
25 v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
fbarchard1 2016/08/31 01:20:43 prefer uvec8 be defined as v16u8 in row.h if that
26 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
27 v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
28 int32_t cnt64, cnt32, cnt16;
fbarchard1 2016/08/31 01:20:43 prefer int32 defined in libyuv/basic_types.h or if
29
30 src += width;
31
32 for (count = 0; count < (width >> 7); count++)
33 {
34 src -= 128;
35 LD_UB8(src, 16, src7, src6, src5, src4, src3, src2, src1, src0);
36 VSHF_B2_UB(src7, src7, src6, src6, mask, mask, dst7, dst6);
37 VSHF_B2_UB(src5, src5, src4, src4, mask, mask, dst5, dst4);
38 VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2);
39 VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0);
40 ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, 16);
41 dst += 128;
42 }
43
44 if ((width >> 4) & 0x7)
fbarchard1 2016/08/31 01:20:42 its not necessary to hand odd widths in row functi
45 {
46 cnt64 = (width >> 6) & 0x1;
47 cnt32 = (width >> 5) & 0x1;
48 cnt16 = (width >> 4) & 0x1;
49
50 if (cnt16)
51 {
52 src -= 16;
53 src0 = LD_UB(src);
54
55 if (cnt32)
56 {
57 src -= 32;
58 LD_UB2(src, 16, src2, src1);
59
60 if (cnt64)
61 {
62 src -= 64;
63 LD_UB4(src, 16, src6, src5, src4, src3);
64
65 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0,
66 (v16i8) src0);
67 VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst2, dst1);
68 VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst6, dst5);
69 VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst4, dst3);
70
71 ST_UB(dst0, dst);
72 dst += 16;
73 ST_UB2(dst1, dst2, dst, 16);
74 dst += 32;
75 ST_UB4(dst3, dst4, dst5, dst6, dst, 16);
76 dst += 64;
77 }
78 else
79 {
80 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0,
81 (v16i8) src0);
82 VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst2, dst1);
83
84 ST_UB(dst0, dst);
85 dst += 16;
86 ST_UB2(dst1, dst2, dst, 16);
87 dst += 32;
88 }
89 }
90 else if (cnt64)
91 {
92 src -= 64;
93 LD_UB4(src, 16, src6, src5, src4, src3);
94
95 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0,
96 (v16i8) src0);
97 VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst4, dst3);
98 VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst2, dst1);
99
100 ST_UB(dst0, dst);
101 dst += 16;
102 ST_UB4(dst1, dst2, dst3, dst4, dst, 16);
103 dst += 64;
104 }
105 else
106 {
107 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0,
108 (v16i8) src0);
109
110 ST_UB(dst0, dst);
111 dst += 16;
112 }
113 }
114 else if (cnt32)
115 {
116 src -= 32;
117 LD_UB2(src, 16, src2, src1);
118
119 if (cnt64)
120 {
121 src -= 64;
122 LD_UB4(src, 16, src6, src5, src4, src3);
123
124 VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst1, dst0);
125 VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst5, dst4);
126 VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst3, dst2);
127
128 ST_UB2(dst0, dst1, dst, 16);
129 dst += 32;
130 ST_UB4(dst2, dst3, dst4, dst5, dst, 16);
131 dst += 64;
132 }
133 else
134 {
135 VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst1, dst0);
136
137 ST_UB2(dst0, dst1, dst, 16);
138 dst += 32;
139 }
140 }
141 else if (cnt64)
142 {
143 src -= 64;
144 LD_UB4(src, 16, src6, src5, src4, src3);
145
146 VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst3, dst2);
147 VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst1, dst0);
148
149 ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
150 dst += 64;
151 }
152 }
153
154 for (count = 0; count < ((width >> 2) & 0x3); count++)
155 {
156 src -= 4;
157 val0 = src[3];
158 val1 = src[2];
159 val2 = src[1];
160 val3 = src[0];
161 dst[0] = val0;
162 dst[1] = val1;
163 dst[2] = val2;
164 dst[3] = val3;
165 dst += 4;
166 }
167
168 for (count = 0; count < (width & 0x3); count++)
169 {
170 val0 = src[-1];
171 src--;
172 dst[0] = val0;
173 dst++;
174 }
175 }
176 #endif // !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa)
177
178 #ifdef __cplusplus
179 } // extern "C"
180 } // namespace libyuv
181 #endif
OLDNEW
« include/libyuv/macros_msa.h ('K') | « libyuv_test.gyp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698