OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
46 for (x = 0; x < width; x += 16) { | 46 for (x = 0; x < width; x += 16) { |
47 LD_UB4(src, 16, src3, src2, src1, src0); | 47 LD_UB4(src, 16, src3, src2, src1, src0); |
48 VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); | 48 VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); |
49 VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); | 49 VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); |
50 ST_UB4(dst0, dst1, dst2, dst3, dst, 16); | 50 ST_UB4(dst0, dst1, dst2, dst3, dst, 16); |
51 dst += 64; | 51 dst += 64; |
52 src -= 64; | 52 src -= 64; |
53 } | 53 } |
54 } | 54 } |
55 | 55 |
56 void I422ToYUY2Row_MSA(const uint8* src_y, | |
57 const uint8* src_u, | |
58 const uint8* src_v, | |
59 uint8* dst_yuy2, | |
60 int width) { | |
61 int x; | |
62 v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; | |
63 v16u8 dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3; | |
64 | |
65 for (x = 0; x < width; x += 32) { | |
66 src_u0 = LD_UB(src_u); | |
67 src_v0 = LD_UB(src_v); | |
68 LD_UB2(src_y, 16, src_y0, src_y1); | |
69 ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); | |
fbarchard1
2016/09/29 17:10:43
Did you try just 1 vector at a time instead of 2?
manojkumar.bhosale
2016/09/30 09:00:29
all the loop unrolling we have done is based on lo
| |
70 ILVRL_B2_UB(vec_uv0, src_y0, dst_yuy2_0, dst_yuy2_1); | |
71 ILVRL_B2_UB(vec_uv1, src_y1, dst_yuy2_2, dst_yuy2_3); | |
72 ST_UB4(dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3, dst_yuy2, 16); | |
73 src_u += 16; | |
74 src_v += 16; | |
75 src_y += 32; | |
76 dst_yuy2 += 64; | |
77 } | |
78 } | |
79 | |
80 void I422ToUYVYRow_MSA(const uint8* src_y, | |
81 const uint8* src_u, | |
82 const uint8* src_v, | |
83 uint8* dst_uyvy, | |
84 int width) { | |
85 int x; | |
86 v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; | |
87 v16u8 dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3; | |
88 | |
89 for (x = 0; x < width; x += 32) { | |
90 src_u0 = LD_UB(src_u); | |
91 src_v0 = LD_UB(src_v); | |
92 LD_UB2(src_y, 16, src_y0, src_y1); | |
93 ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); | |
94 ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1); | |
95 ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3); | |
96 ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16); | |
97 src_u += 16; | |
98 src_v += 16; | |
99 src_y += 32; | |
100 dst_uyvy += 64; | |
101 } | |
102 } | |
103 | |
56 #ifdef __cplusplus | 104 #ifdef __cplusplus |
57 } // extern "C" | 105 } // extern "C" |
58 } // namespace libyuv | 106 } // namespace libyuv |
59 #endif | 107 #endif |
60 | 108 |
61 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 109 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
OLD | NEW |