Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 | |
| 11 #include "libyuv/row.h" | |
| 12 #include "libyuv/macros_msa.h" | |
|
fbarchard1
2016/08/31 01:20:42
This header wont compile on intel/arm as is. May
| |
| 13 | |
| 14 #ifdef __cplusplus | |
| 15 namespace libyuv { | |
| 16 extern "C" { | |
| 17 #endif | |
| 18 | |
| 19 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) | |
| 20 | |
| 21 void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) | |
| 22 { | |
| 23 int32_t count; | |
| 24 uint8_t val0, val1, val2, val3; | |
| 25 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; | |
|
fbarchard1
2016/08/31 01:20:43
prefer uvec8 be defined as v16u8 in row.h if that
| |
| 26 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; | |
| 27 v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; | |
| 28 int32_t cnt64, cnt32, cnt16; | |
|
fbarchard1
2016/08/31 01:20:43
prefer int32 defined in libyuv/basic_types.h
or if
| |
| 29 | |
| 30 src += width; | |
| 31 | |
| 32 for (count = 0; count < (width >> 7); count++) | |
| 33 { | |
| 34 src -= 128; | |
| 35 LD_UB8(src, 16, src7, src6, src5, src4, src3, src2, src1, src0); | |
| 36 VSHF_B2_UB(src7, src7, src6, src6, mask, mask, dst7, dst6); | |
| 37 VSHF_B2_UB(src5, src5, src4, src4, mask, mask, dst5, dst4); | |
| 38 VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2); | |
| 39 VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0); | |
| 40 ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, 16); | |
| 41 dst += 128; | |
| 42 } | |
| 43 | |
| 44 if ((width >> 4) & 0x7) | |
|
fbarchard1
2016/08/31 01:20:42
its not necessary to hand odd widths in row functi
| |
| 45 { | |
| 46 cnt64 = (width >> 6) & 0x1; | |
| 47 cnt32 = (width >> 5) & 0x1; | |
| 48 cnt16 = (width >> 4) & 0x1; | |
| 49 | |
| 50 if (cnt16) | |
| 51 { | |
| 52 src -= 16; | |
| 53 src0 = LD_UB(src); | |
| 54 | |
| 55 if (cnt32) | |
| 56 { | |
| 57 src -= 32; | |
| 58 LD_UB2(src, 16, src2, src1); | |
| 59 | |
| 60 if (cnt64) | |
| 61 { | |
| 62 src -= 64; | |
| 63 LD_UB4(src, 16, src6, src5, src4, src3); | |
| 64 | |
| 65 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, | |
| 66 (v16i8) src0); | |
| 67 VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst2, dst1); | |
| 68 VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst6, dst5); | |
| 69 VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst4, dst3); | |
| 70 | |
| 71 ST_UB(dst0, dst); | |
| 72 dst += 16; | |
| 73 ST_UB2(dst1, dst2, dst, 16); | |
| 74 dst += 32; | |
| 75 ST_UB4(dst3, dst4, dst5, dst6, dst, 16); | |
| 76 dst += 64; | |
| 77 } | |
| 78 else | |
| 79 { | |
| 80 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, | |
| 81 (v16i8) src0); | |
| 82 VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst2, dst1); | |
| 83 | |
| 84 ST_UB(dst0, dst); | |
| 85 dst += 16; | |
| 86 ST_UB2(dst1, dst2, dst, 16); | |
| 87 dst += 32; | |
| 88 } | |
| 89 } | |
| 90 else if (cnt64) | |
| 91 { | |
| 92 src -= 64; | |
| 93 LD_UB4(src, 16, src6, src5, src4, src3); | |
| 94 | |
| 95 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, | |
| 96 (v16i8) src0); | |
| 97 VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst4, dst3); | |
| 98 VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst2, dst1); | |
| 99 | |
| 100 ST_UB(dst0, dst); | |
| 101 dst += 16; | |
| 102 ST_UB4(dst1, dst2, dst3, dst4, dst, 16); | |
| 103 dst += 64; | |
| 104 } | |
| 105 else | |
| 106 { | |
| 107 dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, | |
| 108 (v16i8) src0); | |
| 109 | |
| 110 ST_UB(dst0, dst); | |
| 111 dst += 16; | |
| 112 } | |
| 113 } | |
| 114 else if (cnt32) | |
| 115 { | |
| 116 src -= 32; | |
| 117 LD_UB2(src, 16, src2, src1); | |
| 118 | |
| 119 if (cnt64) | |
| 120 { | |
| 121 src -= 64; | |
| 122 LD_UB4(src, 16, src6, src5, src4, src3); | |
| 123 | |
| 124 VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst1, dst0); | |
| 125 VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst5, dst4); | |
| 126 VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst3, dst2); | |
| 127 | |
| 128 ST_UB2(dst0, dst1, dst, 16); | |
| 129 dst += 32; | |
| 130 ST_UB4(dst2, dst3, dst4, dst5, dst, 16); | |
| 131 dst += 64; | |
| 132 } | |
| 133 else | |
| 134 { | |
| 135 VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst1, dst0); | |
| 136 | |
| 137 ST_UB2(dst0, dst1, dst, 16); | |
| 138 dst += 32; | |
| 139 } | |
| 140 } | |
| 141 else if (cnt64) | |
| 142 { | |
| 143 src -= 64; | |
| 144 LD_UB4(src, 16, src6, src5, src4, src3); | |
| 145 | |
| 146 VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst3, dst2); | |
| 147 VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst1, dst0); | |
| 148 | |
| 149 ST_UB4(dst0, dst1, dst2, dst3, dst, 16); | |
| 150 dst += 64; | |
| 151 } | |
| 152 } | |
| 153 | |
| 154 for (count = 0; count < ((width >> 2) & 0x3); count++) | |
| 155 { | |
| 156 src -= 4; | |
| 157 val0 = src[3]; | |
| 158 val1 = src[2]; | |
| 159 val2 = src[1]; | |
| 160 val3 = src[0]; | |
| 161 dst[0] = val0; | |
| 162 dst[1] = val1; | |
| 163 dst[2] = val2; | |
| 164 dst[3] = val3; | |
| 165 dst += 4; | |
| 166 } | |
| 167 | |
| 168 for (count = 0; count < (width & 0x3); count++) | |
| 169 { | |
| 170 val0 = src[-1]; | |
| 171 src--; | |
| 172 dst[0] = val0; | |
| 173 dst++; | |
| 174 } | |
| 175 } | |
| 176 #endif // !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) | |
| 177 | |
| 178 #ifdef __cplusplus | |
| 179 } // extern "C" | |
| 180 } // namespace libyuv | |
| 181 #endif | |
| OLD | NEW |