Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "libyuv/row.h" | 11 #include "libyuv/row.h" |
| 12 | 12 |
| 13 // This module is for GCC MSA | 13 // This module is for GCC MSA |
| 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| 15 #include "libyuv/macros_msa.h" | 15 #include "libyuv/macros_msa.h" |
| 16 | 16 |
| 17 #ifdef __cplusplus | 17 #ifdef __cplusplus |
| 18 namespace libyuv { | 18 namespace libyuv { |
| 19 extern "C" { | 19 extern "C" { |
| 20 #endif | 20 #endif |
| 21 | 21 |
| 22 // Fill YUV -> RGB conversion constants into vectors | |
| 23 #define FILLYUVTORGBCONSTS(yuvconst, ub, vr, ug, vg, bb, bg, br, yg) { \ | |
|
fbarchard1
2016/10/27 17:37:14
suggest using the same name as row_gcc: YUVTORGB_S
fbarchard1
2016/10/27 18:22:55
Done.
| |
| 24 ub = __msa_fill_w(yuvconst->kUVToB[0]); \ | |
| 25 vr = __msa_fill_w(yuvconst->kUVToR[1]); \ | |
| 26 ug = __msa_fill_w(yuvconst->kUVToG[0]); \ | |
| 27 vg = __msa_fill_w(yuvconst->kUVToG[1]); \ | |
| 28 bb = __msa_fill_w(yuvconst->kUVBiasB[0]); \ | |
| 29 bg = __msa_fill_w(yuvconst->kUVBiasG[0]); \ | |
| 30 br = __msa_fill_w(yuvconst->kUVBiasR[0]); \ | |
| 31 yg = __msa_fill_w(yuvconst->kYToRgb[0]); \ | |
| 32 } | |
| 33 | |
| 22 // Load YUV 422 pixel data | 34 // Load YUV 422 pixel data |
| 23 #define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) { \ | 35 #define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) { \ |
| 24 uint64 y_m; \ | 36 uint64 y_m; \ |
| 25 uint32 u_m, v_m; \ | 37 uint32 u_m, v_m; \ |
| 26 v4i32 zero_m = { 0 }; \ | 38 v4i32 zero_m = { 0 }; \ |
| 27 y_m = LD(psrc_y); \ | 39 y_m = LD(psrc_y); \ |
| 28 u_m = LW(psrc_u); \ | 40 u_m = LW(psrc_u); \ |
| 29 v_m = LW(psrc_v); \ | 41 v_m = LW(psrc_v); \ |
| 30 out_y = (v16u8) __msa_insert_d((v2i64) zero_m, 0, (int64) y_m); \ | 42 out_y = (v16u8) __msa_insert_d((v2i64) zero_m, 0, (int64) y_m); \ |
| 31 out_u = (v16u8) __msa_insert_w(zero_m, 0, (int32) u_m); \ | 43 out_u = (v16u8) __msa_insert_w(zero_m, 0, (int32) u_m); \ |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 87 reg1_m = __msa_min_s_w(reg1_m, max_val_m); \ | 99 reg1_m = __msa_min_s_w(reg1_m, max_val_m); \ |
| 88 reg2_m = __msa_min_s_w(reg2_m, max_val_m); \ | 100 reg2_m = __msa_min_s_w(reg2_m, max_val_m); \ |
| 89 reg3_m = __msa_min_s_w(reg3_m, max_val_m); \ | 101 reg3_m = __msa_min_s_w(reg3_m, max_val_m); \ |
| 90 reg4_m = __msa_min_s_w(reg4_m, max_val_m); \ | 102 reg4_m = __msa_min_s_w(reg4_m, max_val_m); \ |
| 91 reg5_m = __msa_min_s_w(reg5_m, max_val_m); \ | 103 reg5_m = __msa_min_s_w(reg5_m, max_val_m); \ |
| 92 out_b = __msa_pckev_h((v8i16) reg1_m, (v8i16) reg0_m); \ | 104 out_b = __msa_pckev_h((v8i16) reg1_m, (v8i16) reg0_m); \ |
| 93 out_g = __msa_pckev_h((v8i16) reg3_m, (v8i16) reg2_m); \ | 105 out_g = __msa_pckev_h((v8i16) reg3_m, (v8i16) reg2_m); \ |
| 94 out_r = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \ | 106 out_r = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \ |
| 95 } | 107 } |
| 96 | 108 |
| 109 // Pack and Store 8 ARGB values. | |
| 110 #define STOREARGB(in0, in1, in2, in3, pdst_argb) { \ | |
| 111 v8i16 vec0_m, vec1_m; \ | |
| 112 v16u8 dst0_m, dst1_m; \ | |
| 113 vec0_m = (v8i16) __msa_ilvev_b((v16i8) in1, (v16i8) in0); \ | |
| 114 vec1_m = (v8i16) __msa_ilvev_b((v16i8) in3, (v16i8) in2); \ | |
| 115 dst0_m = (v16u8) __msa_ilvr_h(vec1_m, vec0_m); \ | |
| 116 dst1_m = (v16u8) __msa_ilvl_h(vec1_m, vec0_m); \ | |
| 117 ST_UB2(dst0_m, dst1_m, pdst_argb, 16); \ | |
| 118 } | |
| 119 | |
| 97 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { | 120 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { |
| 98 int x; | 121 int x; |
| 99 v16u8 src0, src1, src2, src3; | 122 v16u8 src0, src1, src2, src3; |
| 100 v16u8 dst0, dst1, dst2, dst3; | 123 v16u8 dst0, dst1, dst2, dst3; |
| 101 v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; | 124 v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; |
| 102 src += width - 64; | 125 src += width - 64; |
| 103 | 126 |
| 104 for (x = 0; x < width; x += 64) { | 127 for (x = 0; x < width; x += 64) { |
| 105 LD_UB4(src, 16, src3, src2, src1, src0); | 128 LD_UB4(src, 16, src3, src2, src1, src0); |
| 106 VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); | 129 VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 173 src_v += 16; | 196 src_v += 16; |
| 174 src_y += 32; | 197 src_y += 32; |
| 175 dst_uyvy += 64; | 198 dst_uyvy += 64; |
| 176 } | 199 } |
| 177 } | 200 } |
| 178 | 201 |
| 179 void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u, | 202 void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u, |
| 180 const uint8* src_v, uint8* rgb_buf, | 203 const uint8* src_v, uint8* rgb_buf, |
| 181 const struct YuvConstants* yuvconstants, int width) { | 204 const struct YuvConstants* yuvconstants, int width) { |
| 182 int x; | 205 int x; |
| 183 v16u8 src0, src1, src2, dst0, dst1; | 206 v16u8 src0, src1, src2; |
| 184 v8i16 vec0, vec1, vec2; | 207 v8i16 vec0, vec1, vec2; |
| 185 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | 208 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| 186 v16u8 const_255 = (v16u8) __msa_ldi_b(255); | 209 v16u8 const_255 = (v16u8) __msa_ldi_b(255); |
| 187 | 210 |
| 188 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); | 211 FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| 189 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); | 212 vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| 190 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); | |
| 191 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); | |
| 192 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); | |
| 193 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); | |
| 194 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); | |
| 195 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); | |
| 196 | 213 |
| 197 for (x = 0; x < width; x += 8) { | 214 for (x = 0; x < width; x += 8) { |
| 198 READYUV422(src_y, src_u, src_v, src0, src1, src2); | 215 READYUV422(src_y, src_u, src_v, src0, src1, src2); |
| 199 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | 216 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
| 200 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | 217 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); |
| 201 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0); | 218 STOREARGB(vec0, vec1, vec2, const_255, rgb_buf); |
| 202 vec1 = (v8i16) __msa_ilvev_b((v16i8) const_255, (v16i8) vec2); | |
| 203 dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0); | |
| 204 dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0); | |
| 205 ST_UB2(dst0, dst1, rgb_buf, 16); | |
| 206 src_y += 8; | 219 src_y += 8; |
| 207 src_u += 4; | 220 src_u += 4; |
| 208 src_v += 4; | 221 src_v += 4; |
| 209 rgb_buf += 32; | 222 rgb_buf += 32; |
| 210 } | 223 } |
| 211 } | 224 } |
| 212 | 225 |
| 213 void I422ToRGBARow_MSA(const uint8* src_y, const uint8* src_u, | 226 void I422ToRGBARow_MSA(const uint8* src_y, const uint8* src_u, |
| 214 const uint8* src_v, uint8* rgb_buf, | 227 const uint8* src_v, uint8* rgb_buf, |
| 215 const struct YuvConstants* yuvconstants, int width) { | 228 const struct YuvConstants* yuvconstants, int width) { |
| 216 int x; | 229 int x; |
| 217 v16u8 src0, src1, src2, dst0, dst1; | 230 v16u8 src0, src1, src2; |
| 218 v8i16 vec0, vec1, vec2; | 231 v8i16 vec0, vec1, vec2; |
| 219 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | 232 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| 220 v16u8 const_255 = (v16u8) __msa_ldi_b(255); | 233 v16u8 const_255 = (v16u8) __msa_ldi_b(255); |
| 221 | 234 |
| 222 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); | 235 FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| 223 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); | 236 vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| 224 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); | |
| 225 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); | |
| 226 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); | |
| 227 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); | |
| 228 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); | |
| 229 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); | |
| 230 | 237 |
| 231 for (x = 0; x < width; x += 8) { | 238 for (x = 0; x < width; x += 8) { |
| 232 READYUV422(src_y, src_u, src_v, src0, src1, src2); | 239 READYUV422(src_y, src_u, src_v, src0, src1, src2); |
| 233 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | 240 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
| 234 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | 241 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); |
| 235 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec0, (v16i8) const_255); | 242 STOREARGB(const_255, vec0, vec1, vec2, rgb_buf); |
| 236 vec1 = (v8i16) __msa_ilvev_b((v16i8) vec2, (v16i8) vec1); | |
| 237 dst0 = (v16u8) __msa_ilvr_h(vec1, vec0); | |
| 238 dst1 = (v16u8) __msa_ilvl_h(vec1, vec0); | |
| 239 ST_UB2(dst0, dst1, rgb_buf, 16); | |
| 240 src_y += 8; | 243 src_y += 8; |
| 241 src_u += 4; | 244 src_u += 4; |
| 242 src_v += 4; | 245 src_v += 4; |
| 243 rgb_buf += 32; | 246 rgb_buf += 32; |
| 244 } | 247 } |
| 245 } | 248 } |
| 246 | 249 |
| 247 void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u, | 250 void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u, |
| 248 const uint8* src_v, const uint8* src_a, | 251 const uint8* src_v, const uint8* src_a, |
| 249 uint8* rgb_buf, | 252 uint8* rgb_buf, |
| 250 const struct YuvConstants* yuvconstants, | 253 const struct YuvConstants* yuvconstants, |
| 251 int width) { | 254 int width) { |
| 252 int x; | 255 int x; |
| 253 int64 data_a; | 256 int64 data_a; |
| 254 v16u8 src0, src1, src2, src3, dst0, dst1; | 257 v16u8 src0, src1, src2, src3; |
| 255 v8i16 vec0, vec1, vec2; | 258 v8i16 vec0, vec1, vec2; |
| 256 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | 259 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| 257 v4i32 zero = { 0 }; | 260 v4i32 zero = { 0 }; |
| 258 | 261 |
| 259 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); | 262 FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| 260 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); | 263 vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| 261 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); | |
| 262 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); | |
| 263 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); | |
| 264 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); | |
| 265 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); | |
| 266 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); | |
| 267 | 264 |
| 268 for (x = 0; x < width; x += 8) { | 265 for (x = 0; x < width; x += 8) { |
| 269 data_a = LD(src_a); | 266 data_a = LD(src_a); |
| 270 READYUV422(src_y, src_u, src_v, src0, src1, src2); | 267 READYUV422(src_y, src_u, src_v, src0, src1, src2); |
| 271 src3 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_a); | 268 src3 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_a); |
| 272 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | 269 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
| 273 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | 270 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); |
| 274 src3 = (v16u8) __msa_ilvr_b((v16i8) src3, (v16i8) src3); | 271 src3 = (v16u8) __msa_ilvr_b((v16i8) src3, (v16i8) src3); |
| 275 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0); | 272 STOREARGB(vec0, vec1, vec2, src3, rgb_buf); |
| 276 vec1 = (v8i16) __msa_ilvev_b((v16i8) src3, (v16i8) vec2); | |
| 277 dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0); | |
| 278 dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0); | |
| 279 ST_UB2(dst0, dst1, rgb_buf, 16); | |
| 280 src_y += 8; | 273 src_y += 8; |
| 281 src_u += 4; | 274 src_u += 4; |
| 282 src_v += 4; | 275 src_v += 4; |
| 283 src_a += 8; | 276 src_a += 8; |
| 284 rgb_buf += 32; | 277 rgb_buf += 32; |
| 285 } | 278 } |
| 286 } | 279 } |
| 287 | 280 |
| 288 void I422ToRGB24Row_MSA(const uint8* src_y, const uint8* src_u, | 281 void I422ToRGB24Row_MSA(const uint8* src_y, const uint8* src_u, |
| 289 const uint8* src_v, uint8* rgb_buf, | 282 const uint8* src_v, uint8* rgb_buf, |
| 290 const struct YuvConstants* yuvconstants, int32 width) { | 283 const struct YuvConstants* yuvconstants, int32 width) { |
| 291 int x; | 284 int x; |
| 292 int64 data_u, data_v; | 285 int64 data_u, data_v; |
| 293 v16u8 src0, src1, src2, src3, src4, src5, dst0, dst1, dst2; | 286 v16u8 src0, src1, src2, src3, src4, src5, dst0, dst1, dst2; |
| 294 v8i16 vec0, vec1, vec2, vec3, vec4, vec5; | 287 v8i16 vec0, vec1, vec2, vec3, vec4, vec5; |
| 295 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | 288 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| 296 v16u8 reg0, reg1, reg2, reg3; | 289 v16u8 reg0, reg1, reg2, reg3; |
| 297 v2i64 zero = { 0 }; | 290 v2i64 zero = { 0 }; |
| 298 v16i8 shuffler0 = { 0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10 }; | 291 v16i8 shuffler0 = { 0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10 }; |
| 299 v16i8 shuffler1 = { 0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10 }; | 292 v16i8 shuffler1 = { 0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10 }; |
| 300 v16i8 shuffler2 = | 293 v16i8 shuffler2 = |
| 301 { 26, 6, 7, 27, 8, 9, 28, 10, 11, 29, 12, 13, 30, 14, 15, 31 }; | 294 { 26, 6, 7, 27, 8, 9, 28, 10, 11, 29, 12, 13, 30, 14, 15, 31 }; |
| 302 | 295 |
| 303 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); | 296 FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| 304 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); | 297 vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| 305 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); | |
| 306 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); | |
| 307 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); | |
| 308 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); | |
| 309 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); | |
| 310 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); | |
| 311 | 298 |
| 312 for (x = 0; x < width; x += 16) { | 299 for (x = 0; x < width; x += 16) { |
| 313 src0 = (v16u8) __msa_ld_b((v16u8*) src_y, 0); | 300 src0 = (v16u8) __msa_ld_b((v16u8*) src_y, 0); |
| 314 data_u = LD(src_u); | 301 data_u = LD(src_u); |
| 315 data_v = LD(src_v); | 302 data_v = LD(src_v); |
| 316 src1 = (v16u8) __msa_insert_d(zero, 0, data_u); | 303 src1 = (v16u8) __msa_insert_d(zero, 0, data_u); |
| 317 src2 = (v16u8) __msa_insert_d(zero, 0, data_v); | 304 src2 = (v16u8) __msa_insert_d(zero, 0, data_v); |
| 318 src3 = (v16u8) __msa_sldi_b((v16i8) src0, (v16i8) src0, 8); | 305 src3 = (v16u8) __msa_sldi_b((v16i8) src0, (v16i8) src0, 8); |
| 319 src4 = (v16u8) __msa_sldi_b((v16i8) src1, (v16i8) src1, 4); | 306 src4 = (v16u8) __msa_sldi_b((v16i8) src1, (v16i8) src1, 4); |
| 320 src5 = (v16u8) __msa_sldi_b((v16i8) src2, (v16i8) src2, 4); | 307 src5 = (v16u8) __msa_sldi_b((v16i8) src2, (v16i8) src2, 4); |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 331 dst2 = (v16u8) __msa_vshf_b(shuffler2, (v16i8) reg3, (v16i8) reg2); | 318 dst2 = (v16u8) __msa_vshf_b(shuffler2, (v16i8) reg3, (v16i8) reg2); |
| 332 ST_UB2(dst0, dst1, rgb_buf, 16); | 319 ST_UB2(dst0, dst1, rgb_buf, 16); |
| 333 ST_UB(dst2, (rgb_buf + 32)); | 320 ST_UB(dst2, (rgb_buf + 32)); |
| 334 src_y += 16; | 321 src_y += 16; |
| 335 src_u += 8; | 322 src_u += 8; |
| 336 src_v += 8; | 323 src_v += 8; |
| 337 rgb_buf += 48; | 324 rgb_buf += 48; |
| 338 } | 325 } |
| 339 } | 326 } |
| 340 | 327 |
| 328 void I422ToRGB565Row_MSA(const uint8* src_y, const uint8* src_u, | |
| 329 const uint8* src_v, uint8* dst_rgb565, | |
| 330 const struct YuvConstants* yuvconstants, int width) { | |
| 331 int x; | |
| 332 v16u8 src0, src1, src2, dst0; | |
| 333 v8i16 vec0, vec1, vec2; | |
| 334 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | |
| 335 | |
| 336 FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, | |
| 337 vec_vg, vec_bb, vec_bg, vec_br, vec_yg); | |
| 338 | |
| 339 for (x = 0; x < width; x += 8) { | |
| 340 READYUV422(src_y, src_u, src_v, src0, src1, src2); | |
| 341 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | |
|
fbarchard1
2016/10/27 17:37:15
Suggest renaming I422ToRGB to YUVTORGB for consist
fbarchard1
2016/10/27 18:22:55
Done.
| |
| 342 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec2, vec1); | |
| 343 vec0 = __msa_srai_h(vec0, 3); | |
| 344 vec1 = __msa_srai_h(vec1, 3); | |
| 345 vec2 = __msa_srai_h(vec2, 2); | |
| 346 vec1 = __msa_slli_h(vec1, 11); | |
| 347 vec2 = __msa_slli_h(vec2, 5); | |
| 348 vec0 |= vec1; | |
| 349 dst0 = (v16u8) (vec2 | vec0); | |
| 350 ST_UB(dst0, dst_rgb565); | |
| 351 src_y += 8; | |
| 352 src_u += 4; | |
| 353 src_v += 4; | |
| 354 dst_rgb565 += 16; | |
| 355 } | |
| 356 } | |
| 357 | |
| 358 void I422ToARGB4444Row_MSA(const uint8* src_y, const uint8* src_u, | |
| 359 const uint8* src_v, uint8* dst_argb4444, | |
| 360 const struct YuvConstants* yuvconstants, int width) { | |
| 361 int x; | |
| 362 v16u8 src0, src1, src2, dst0; | |
| 363 v8i16 vec0, vec1, vec2; | |
| 364 v8u16 reg0, reg1, reg2; | |
| 365 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | |
| 366 v8u16 const_0xF000 = (v8u16) __msa_fill_h(0xF000); | |
| 367 | |
| 368 FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, | |
| 369 vec_vg, vec_bb, vec_bg, vec_br, vec_yg); | |
| 370 | |
| 371 for (x = 0; x < width; x += 8) { | |
| 372 READYUV422(src_y, src_u, src_v, src0, src1, src2); | |
| 373 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | |
| 374 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | |
| 375 reg0 = (v8u16) __msa_srai_h(vec0, 4); | |
| 376 reg1 = (v8u16) __msa_srai_h(vec1, 4); | |
|
fbarchard1
2016/10/27 17:37:15
instead of shift right and then left, consider in
| |
| 377 reg2 = (v8u16) __msa_srai_h(vec2, 4); | |
| 378 reg1 = (v8u16) __msa_slli_h((v8i16) reg1, 4); | |
| 379 reg2 = (v8u16) __msa_slli_h((v8i16) reg2, 8); | |
| 380 reg1 |= const_0xF000; | |
| 381 reg0 |= reg2; | |
| 382 dst0 = (v16u8) (reg1 | reg0); | |
| 383 ST_UB(dst0, dst_argb4444); | |
| 384 src_y += 8; | |
| 385 src_u += 4; | |
| 386 src_v += 4; | |
| 387 dst_argb4444 += 16; | |
| 388 } | |
| 389 } | |
| 390 | |
| 391 void I422ToARGB1555Row_MSA(const uint8* src_y, const uint8* src_u, | |
| 392 const uint8* src_v, uint8* dst_argb1555, | |
| 393 const struct YuvConstants* yuvconstants, int width) { | |
| 394 int x; | |
| 395 v16u8 src0, src1, src2, dst0; | |
| 396 v8i16 vec0, vec1, vec2; | |
| 397 v8u16 reg0, reg1, reg2; | |
| 398 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | |
| 399 v8u16 const_0x8000 = (v8u16) __msa_fill_h(0x8000); | |
| 400 | |
| 401 FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, | |
| 402 vec_vg, vec_bb, vec_bg, vec_br, vec_yg); | |
| 403 | |
| 404 for (x = 0; x < width; x += 8) { | |
| 405 READYUV422(src_y, src_u, src_v, src0, src1, src2); | |
| 406 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | |
| 407 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | |
| 408 reg0 = (v8u16) __msa_srai_h(vec0, 3); | |
| 409 reg1 = (v8u16) __msa_srai_h(vec1, 3); | |
| 410 reg2 = (v8u16) __msa_srai_h(vec2, 3); | |
| 411 reg1 = (v8u16) __msa_slli_h((v8i16) reg1, 5); | |
| 412 reg2 = (v8u16) __msa_slli_h((v8i16) reg2, 10); | |
| 413 reg1 |= const_0x8000; | |
| 414 reg0 |= reg2; | |
| 415 dst0 = (v16u8) (reg1 | reg0); | |
| 416 ST_UB(dst0, dst_argb1555); | |
| 417 src_y += 8; | |
| 418 src_u += 4; | |
| 419 src_v += 4; | |
| 420 dst_argb1555 += 16; | |
| 421 } | |
| 422 } | |
| 423 | |
| 341 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { | 424 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { |
| 342 int x; | 425 int x; |
| 343 v16u8 src0, src1, src2, src3, dst0, dst1; | 426 v16u8 src0, src1, src2, src3, dst0, dst1; |
| 344 | 427 |
| 345 for (x = 0; x < width; x += 32) { | 428 for (x = 0; x < width; x += 32) { |
| 346 LD_UB4(src_yuy2, 16, src0, src1, src2, src3); | 429 LD_UB4(src_yuy2, 16, src0, src1, src2, src3); |
| 347 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | 430 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); |
| 348 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); | 431 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); |
| 349 ST_UB2(dst0, dst1, dst_y, 16); | 432 ST_UB2(dst0, dst1, dst_y, 16); |
| 350 src_yuy2 += 64; | 433 src_yuy2 += 64; |
| (...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 646 dst_argb += 64; | 729 dst_argb += 64; |
| 647 } | 730 } |
| 648 } | 731 } |
| 649 | 732 |
| 650 #ifdef __cplusplus | 733 #ifdef __cplusplus |
| 651 } // extern "C" | 734 } // extern "C" |
| 652 } // namespace libyuv | 735 } // namespace libyuv |
| 653 #endif | 736 #endif |
| 654 | 737 |
| 655 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 738 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| OLD | NEW |