Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1147)

Side by Side Diff: source/row_msa.cc

Issue 2285683002: Add MIPS SIMD Arch (MSA) optimized MirrorRow function (Closed)
Patch Set: Changes as per review comments Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« source/cpu_id.cc ('K') | « source/row_any.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12
13 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa)
14 #include "libyuv/cpu/mips/macros_msa.h"
15 #endif
16
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21
22 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa)
23 void MirrorRow_MSA(const uint8* src, uint8* dst, int width)
24 {
fbarchard1 2016/09/14 01:48:05 { should be on same row as void MirrorRow_MSA()
manojkumar.bhosale 2016/09/14 12:45:30 Done.
25 int count;
26 v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
27 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
28 v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
29
30 src += width;
31
32 for (count = 0; count < (width >> 7); count++)
33 {
fbarchard1 2016/09/14 01:48:05 { should be on same line as for ()
manojkumar.bhosale 2016/09/14 12:45:30 Done.
34 src -= 128;
35 LD_UB8(src, 16, src7, src6, src5, src4, src3, src2, src1, src0);
36
37 VSHF_B2_UB(src7, src7, src6, src6, mask, mask, dst7, dst6);
fbarchard1 2016/09/14 01:48:04 I wouldnt normally unroll this much. 2 or 4 vecto
manojkumar.bhosale 2016/09/14 12:45:30 Done with 4 vectors
38 VSHF_B2_UB(src5, src5, src4, src4, mask, mask, dst5, dst4);
39 VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2);
40 VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0);
41
42 ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, 16);
43 dst += 128;
44 }
45
46 if (width & 0x40)
fbarchard1 2016/09/14 01:48:04 suggest removing everything from here down to make
manojkumar.bhosale 2016/09/14 12:45:30 Done.
47 {
48 if (width & 0x20)
49 {
50 if (width & 0x10)
51 {
52 src -= 112;
53 LD_UB7(src, 16, src6, src5, src4, src3, src2, src1, src0);
54
55 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
56 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3);
57 VSHF_B2_UB(src4, src4, src5, src5, mask, mask, dst4, dst5);
58 dst6 = (v16u8) __msa_vshf_b(mask, (v16i8) src6, (v16i8) src6);
59
60 ST_UB7(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst, 16);
61 dst += 112;
62 }
63 else
64 {
65 src -= 96;
66 LD_UB6(src, 16, src5, src4, src3, src2, src1, src0);
67
68 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
69 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3);
70 VSHF_B2_UB(src4, src4, src5, src5, mask, mask, dst4, dst5);
71
72 ST_UB6(dst0, dst1, dst2, dst3, dst4, dst5, dst, 16);
73 dst += 96;
74 }
75 }
76 else if (width & 0x10)
77 {
78 src -= 80;
79 LD_UB5(src, 16, src4, src3, src2, src1, src0);
80
81 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
82 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3);
83 dst4 = (v16u8) __msa_vshf_b(mask, (v16i8) src4, (v16i8) src4);
84
85 ST_UB5(dst0, dst1, dst2, dst3, dst4, dst, 16);
86 dst += 80;
87 }
88 else
89 {
90 src -= 64;
91 LD_UB4(src, 16, src3, src2, src1, src0);
92
93 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
94 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3);
95
96 ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
97 dst += 64;
98 }
99 }
100 else if (width & 0x20)
101 {
102 if (width & 0x10)
103 {
104 src -= 48;
105 LD_UB3(src, 16, src2, src1, src0);
106
107 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
108 dst2 = (v16u8) __msa_vshf_b(mask, (v16i8) src2, (v16i8) src2);
109
110 ST_UB3(dst0, dst1, dst2, dst, 16);
111 dst += 48;
112 }
113 else
114 {
115 src -= 32;
116 LD_UB2(src, 16, src1, src0);
117
118 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
119
120 ST_UB2(dst0, dst1, dst, 16);
121 dst += 32;
122 }
123 }
124 else if (width & 0x10)
125 {
126 src -= 16;
127 src0 = LD_UB(src);
128
129 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, (v16i8) src0);
130 ST_UB(dst0, dst);
131 dst += 16;
132 }
133 }
134 #endif // !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa)
135
136 #ifdef __cplusplus
137 } // extern "C"
138 } // namespace libyuv
139 #endif
OLDNEW
« source/cpu_id.cc ('K') | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698