Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 | |
| 11 #include "libyuv/row.h" | |
| 12 | |
| 13 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) | |
| 14 #include "libyuv/cpu/mips/macros_msa.h" | |
| 15 #endif | |
| 16 | |
| 17 #ifdef __cplusplus | |
| 18 namespace libyuv { | |
| 19 extern "C" { | |
| 20 #endif | |
| 21 | |
| 22 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) | |
| 23 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) | |
| 24 { | |
|
fbarchard1
2016/09/14 01:48:05
{ should be on same row as void MirrorRow_MSA()
manojkumar.bhosale
2016/09/14 12:45:30
Done.
| |
| 25 int count; | |
| 26 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; | |
| 27 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; | |
| 28 v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; | |
| 29 | |
| 30 src += width; | |
| 31 | |
| 32 for (count = 0; count < (width >> 7); count++) | |
| 33 { | |
|
fbarchard1
2016/09/14 01:48:05
{ should be on same line as for ()
manojkumar.bhosale
2016/09/14 12:45:30
Done.
| |
| 34 src -= 128; | |
| 35 LD_UB8(src, 16, src7, src6, src5, src4, src3, src2, src1, src0); | |
| 36 | |
| 37 VSHF_B2_UB(src7, src7, src6, src6, mask, mask, dst7, dst6); | |
|
fbarchard1
2016/09/14 01:48:04
I wouldnt normally unroll this much. 2 or 4 vecto
manojkumar.bhosale
2016/09/14 12:45:30
Done with 4 vectors
| |
| 38 VSHF_B2_UB(src5, src5, src4, src4, mask, mask, dst5, dst4); | |
| 39 VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2); | |
| 40 VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0); | |
| 41 | |
| 42 ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, 16); | |
| 43 dst += 128; | |
| 44 } | |
| 45 | |
| 46 if (width & 0x40) | |
|
fbarchard1
2016/09/14 01:48:04
suggest removing everything from here down to make
manojkumar.bhosale
2016/09/14 12:45:30
Done.
| |
| 47 { | |
| 48 if (width & 0x20) | |
| 49 { | |
| 50 if (width & 0x10) | |
| 51 { | |
| 52 src -= 112; | |
| 53 LD_UB7(src, 16, src6, src5, src4, src3, src2, src1, src0); | |
| 54 | |
| 55 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
| 56 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); | |
| 57 VSHF_B2_UB(src4, src4, src5, src5, mask, mask, dst4, dst5); | |
| 58 dst6 = (v16u8) __msa_vshf_b(mask, (v16i8) src6, (v16i8) src6); | |
| 59 | |
| 60 ST_UB7(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst, 16); | |
| 61 dst += 112; | |
| 62 } | |
| 63 else | |
| 64 { | |
| 65 src -= 96; | |
| 66 LD_UB6(src, 16, src5, src4, src3, src2, src1, src0); | |
| 67 | |
| 68 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
| 69 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); | |
| 70 VSHF_B2_UB(src4, src4, src5, src5, mask, mask, dst4, dst5); | |
| 71 | |
| 72 ST_UB6(dst0, dst1, dst2, dst3, dst4, dst5, dst, 16); | |
| 73 dst += 96; | |
| 74 } | |
| 75 } | |
| 76 else if (width & 0x10) | |
| 77 { | |
| 78 src -= 80; | |
| 79 LD_UB5(src, 16, src4, src3, src2, src1, src0); | |
| 80 | |
| 81 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
| 82 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); | |
| 83 dst4 = (v16u8) __msa_vshf_b(mask, (v16i8) src4, (v16i8) src4); | |
| 84 | |
| 85 ST_UB5(dst0, dst1, dst2, dst3, dst4, dst, 16); | |
| 86 dst += 80; | |
| 87 } | |
| 88 else | |
| 89 { | |
| 90 src -= 64; | |
| 91 LD_UB4(src, 16, src3, src2, src1, src0); | |
| 92 | |
| 93 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
| 94 VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); | |
| 95 | |
| 96 ST_UB4(dst0, dst1, dst2, dst3, dst, 16); | |
| 97 dst += 64; | |
| 98 } | |
| 99 } | |
| 100 else if (width & 0x20) | |
| 101 { | |
| 102 if (width & 0x10) | |
| 103 { | |
| 104 src -= 48; | |
| 105 LD_UB3(src, 16, src2, src1, src0); | |
| 106 | |
| 107 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
| 108 dst2 = (v16u8) __msa_vshf_b(mask, (v16i8) src2, (v16i8) src2); | |
| 109 | |
| 110 ST_UB3(dst0, dst1, dst2, dst, 16); | |
| 111 dst += 48; | |
| 112 } | |
| 113 else | |
| 114 { | |
| 115 src -= 32; | |
| 116 LD_UB2(src, 16, src1, src0); | |
| 117 | |
| 118 VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); | |
| 119 | |
| 120 ST_UB2(dst0, dst1, dst, 16); | |
| 121 dst += 32; | |
| 122 } | |
| 123 } | |
| 124 else if (width & 0x10) | |
| 125 { | |
| 126 src -= 16; | |
| 127 src0 = LD_UB(src); | |
| 128 | |
| 129 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, (v16i8) src0); | |
| 130 ST_UB(dst0, dst); | |
| 131 dst += 16; | |
| 132 } | |
| 133 } | |
| 134 #endif // !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) | |
| 135 | |
| 136 #ifdef __cplusplus | |
| 137 } // extern "C" | |
| 138 } // namespace libyuv | |
| 139 #endif | |
| OLD | NEW |