OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "libyuv/row.h" | |
12 | |
13 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) | |
14 #include "libyuv/cpu/mips/macros_msa.h" | |
15 #endif | |
16 | |
17 #ifdef __cplusplus | |
18 namespace libyuv { | |
19 extern "C" { | |
20 #endif | |
21 | |
22 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) | |
23 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) | |
24 { | |
fbarchard1
2016/09/14 01:48:05
{ should be on same row as void MirrorRow_MSA()
manojkumar.bhosale
2016/09/14 12:45:30
Done.
| |
25 int count; | |
26 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; | |
27 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; | |
28 v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; | |
29 | |
30 src += width; | |
31 | |
32 for (count = 0; count < (width >> 7); count++) | |
33 { | |
fbarchard1
2016/09/14 01:48:05
{ should be on same line as for ()
manojkumar.bhosale
2016/09/14 12:45:30
Done.
| |
34 src -= 128; | |
35 LD_UB8(src, 16, src7, src6, src5, src4, src3, src2, src1, src0); | |
36 | |
37 VSHF_B2_UB(src7, src7, src6, src6, mask, mask, dst7, dst6); | |
fbarchard1
2016/09/14 01:48:04
I wouldnt normally unroll this much. 2 or 4 vecto
manojkumar.bhosale
2016/09/14 12:45:30
Done with 4 vectors
| |
38 VSHF_B2_UB(src5, src5, src4, src4, mask, mask, dst5, dst4); | |
39 VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2); | |
40 VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0); | |
41 | |
42 ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, 16); | |
43 dst += 128; | |
44 } | |
45 | |
46 if (width & 0x40) | |
fbarchard1
2016/09/14 01:48:04
suggest removing everything from here down to make
manojkumar.bhosale
2016/09/14 12:45:30
Done.
| |
47 { | |
48 if (width & 0x20) | |
49 { | |
50 if (width & 0x10) | |
51 { | |
52 src -= 112; | |
53 LD_UB7(src, 16, src6, src5, src4, src3, src2, src1, src0); | |
54 | |
55 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
56 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); | |
57 VSHF_B2_UB(src4, src4, src5, src5, mask, mask, dst4, dst5); | |
58 dst6 = (v16u8) __msa_vshf_b(mask, (v16i8) src6, (v16i8) src6); | |
59 | |
60 ST_UB7(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst, 16); | |
61 dst += 112; | |
62 } | |
63 else | |
64 { | |
65 src -= 96; | |
66 LD_UB6(src, 16, src5, src4, src3, src2, src1, src0); | |
67 | |
68 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
69 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); | |
70 VSHF_B2_UB(src4, src4, src5, src5, mask, mask, dst4, dst5); | |
71 | |
72 ST_UB6(dst0, dst1, dst2, dst3, dst4, dst5, dst, 16); | |
73 dst += 96; | |
74 } | |
75 } | |
76 else if (width & 0x10) | |
77 { | |
78 src -= 80; | |
79 LD_UB5(src, 16, src4, src3, src2, src1, src0); | |
80 | |
81 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
82 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); | |
83 dst4 = (v16u8) __msa_vshf_b(mask, (v16i8) src4, (v16i8) src4); | |
84 | |
85 ST_UB5(dst0, dst1, dst2, dst3, dst4, dst, 16); | |
86 dst += 80; | |
87 } | |
88 else | |
89 { | |
90 src -= 64; | |
91 LD_UB4(src, 16, src3, src2, src1, src0); | |
92 | |
93 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
94 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); | |
95 | |
96 ST_UB4(dst0, dst1, dst2, dst3, dst, 16); | |
97 dst += 64; | |
98 } | |
99 } | |
100 else if (width & 0x20) | |
101 { | |
102 if (width & 0x10) | |
103 { | |
104 src -= 48; | |
105 LD_UB3(src, 16, src2, src1, src0); | |
106 | |
107 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
108 dst2 = (v16u8) __msa_vshf_b(mask, (v16i8) src2, (v16i8) src2); | |
109 | |
110 ST_UB3(dst0, dst1, dst2, dst, 16); | |
111 dst += 48; | |
112 } | |
113 else | |
114 { | |
115 src -= 32; | |
116 LD_UB2(src, 16, src1, src0); | |
117 | |
118 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
119 | |
120 ST_UB2(dst0, dst1, dst, 16); | |
121 dst += 32; | |
122 } | |
123 } | |
124 else if (width & 0x10) | |
125 { | |
126 src -= 16; | |
127 src0 = LD_UB(src); | |
128 | |
129 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, (v16i8) src0); | |
130 ST_UB(dst0, dst); | |
131 dst += 16; | |
132 } | |
133 } | |
134 #endif // !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) | |
135 | |
136 #ifdef __cplusplus | |
137 } // extern "C" | |
138 } // namespace libyuv | |
139 #endif | |
OLD | NEW |