Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 214 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | 214 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); |
| 215 dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); | 215 dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); |
| 216 ST_UB(dst0, dst_u); | 216 ST_UB(dst0, dst_u); |
| 217 ST_UB(dst1, dst_v); | 217 ST_UB(dst1, dst_v); |
| 218 src_uyvy += 64; | 218 src_uyvy += 64; |
| 219 dst_u += 16; | 219 dst_u += 16; |
| 220 dst_v += 16; | 220 dst_v += 16; |
| 221 } | 221 } |
| 222 } | 222 } |
| 223 | 223 |
| 224 void ARGBToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { | |
| 225 int x; | |
| 226 v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; | |
| 227 v8u16 reg0, reg1, reg2, reg3, reg4, reg5; | |
| 228 v16i8 zero = { 0 }; | |
| 229 v8u16 const_0x19 = (v8u16) __msa_ldi_h(0x19); | |
| 230 v8u16 const_0x81 = (v8u16) __msa_ldi_h(0x81); | |
| 231 v8u16 const_0x42 = (v8u16) __msa_ldi_h(0x42); | |
| 232 v8u16 const_0x1080 = (v8u16) __msa_fill_h(0x1080); | |
| 233 | |
| 234 for (x = 0; x < width; x += 16) { | |
| 235 src0 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 0); | |
| 236 src1 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 16); | |
| 237 src2 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 32); | |
| 238 src3 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 48); | |
| 239 vec0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | |
| 240 vec1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); | |
| 241 vec2 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); | |
| 242 vec3 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2); | |
| 243 reg0 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec0); | |
| 244 reg1 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec1); | |
| 245 reg2 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec2); | |
| 246 reg3 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec3); | |
| 247 reg4 = (v8u16) __msa_ilvod_b(zero, (v16i8) vec0); | |
| 248 reg5 = (v8u16) __msa_ilvod_b(zero, (v16i8) vec1); | |
| 249 reg0 *= const_0x19; | |
| 250 reg1 *= const_0x19; | |
| 251 reg2 *= const_0x81; | |
| 252 reg3 *= const_0x81; | |
| 253 reg4 *= const_0x42; | |
| 254 reg5 *= const_0x42; | |
| 255 reg0 += reg2; | |
| 256 reg1 += reg3; | |
| 257 reg0 += reg4; | |
| 258 reg1 += reg5; | |
| 259 reg0 += const_0x1080; | |
| 260 reg1 += const_0x1080; | |
| 261 reg0 = (v8u16) __msa_srai_h((v8i16) reg0, 8); | |
| 262 reg1 = (v8u16) __msa_srai_h((v8i16) reg1, 8); | |
| 263 dst0 = (v16u8) __msa_pckev_b((v16i8) reg1, (v16i8) reg0); | |
| 264 ST_UB(dst0, dst_y); | |
| 265 src_argb0 += 64; | |
| 266 dst_y += 16; | |
| 267 } | |
| 268 } | |
| 269 | |
| 270 void ARGBToUVRow_MSA(const uint8* src_argb0, int src_stride_argb, | |
|
fbarchard1
2016/10/19 18:10:33
this is kinda HUGE! But I see it does a 16 byte s
| |
| 271 uint8* dst_u, uint8* dst_v, int width) { | |
| 272 int x; | |
| 273 const uint8* src_argb0_next = src_argb0 + src_stride_argb; | |
| 274 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; | |
| 275 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; | |
| 276 v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; | |
| 277 v16u8 dst0, dst1; | |
| 278 v8u16 const_0x70 = (v8u16) __msa_ldi_h(0x70); | |
| 279 v8u16 const_0x4A = (v8u16) __msa_ldi_h(0x4A); | |
| 280 v8u16 const_0x26 = (v8u16) __msa_ldi_h(0x26); | |
| 281 v8u16 const_0x5E = (v8u16) __msa_ldi_h(0x5E); | |
| 282 v8u16 const_0x12 = (v8u16) __msa_ldi_h(0x12); | |
| 283 v8u16 const_0x8080 = (v8u16) __msa_fill_h(0x8080); | |
| 284 | |
| 285 for (x = 0; x < width; x += 32) { | |
| 286 src0 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 0); | |
| 287 src1 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 16); | |
| 288 src2 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 32); | |
| 289 src3 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 48); | |
| 290 src4 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 64); | |
| 291 src5 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 80); | |
| 292 src6 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 96); | |
| 293 src7 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 112); | |
| 294 vec0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | |
| 295 vec1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); | |
| 296 vec2 = (v16u8) __msa_pckev_b((v16i8) src5, (v16i8) src4); | |
| 297 vec3 = (v16u8) __msa_pckev_b((v16i8) src7, (v16i8) src6); | |
| 298 vec4 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); | |
| 299 vec5 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2); | |
| 300 vec6 = (v16u8) __msa_pckod_b((v16i8) src5, (v16i8) src4); | |
| 301 vec7 = (v16u8) __msa_pckod_b((v16i8) src7, (v16i8) src6); | |
| 302 vec8 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0); | |
| 303 vec9 = (v16u8) __msa_pckev_b((v16i8) vec3, (v16i8) vec2); | |
| 304 vec4 = (v16u8) __msa_pckev_b((v16i8) vec5, (v16i8) vec4); | |
| 305 vec5 = (v16u8) __msa_pckev_b((v16i8) vec7, (v16i8) vec6); | |
| 306 vec0 = (v16u8) __msa_pckod_b((v16i8) vec1, (v16i8) vec0); | |
| 307 vec1 = (v16u8) __msa_pckod_b((v16i8) vec3, (v16i8) vec2); | |
| 308 reg0 = __msa_hadd_u_h(vec8, vec8); | |
| 309 reg1 = __msa_hadd_u_h(vec9, vec9); | |
| 310 reg2 = __msa_hadd_u_h(vec4, vec4); | |
| 311 reg3 = __msa_hadd_u_h(vec5, vec5); | |
| 312 reg4 = __msa_hadd_u_h(vec0, vec0); | |
| 313 reg5 = __msa_hadd_u_h(vec1, vec1); | |
| 314 src0 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 0); | |
| 315 src1 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 16); | |
| 316 src2 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 32); | |
| 317 src3 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 48); | |
| 318 src4 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 64); | |
| 319 src5 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 80); | |
| 320 src6 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 96); | |
| 321 src7 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 112); | |
| 322 vec0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | |
| 323 vec1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); | |
| 324 vec2 = (v16u8) __msa_pckev_b((v16i8) src5, (v16i8) src4); | |
| 325 vec3 = (v16u8) __msa_pckev_b((v16i8) src7, (v16i8) src6); | |
| 326 vec4 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); | |
| 327 vec5 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2); | |
| 328 vec6 = (v16u8) __msa_pckod_b((v16i8) src5, (v16i8) src4); | |
| 329 vec7 = (v16u8) __msa_pckod_b((v16i8) src7, (v16i8) src6); | |
| 330 vec8 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0); | |
| 331 vec9 = (v16u8) __msa_pckev_b((v16i8) vec3, (v16i8) vec2); | |
| 332 vec4 = (v16u8) __msa_pckev_b((v16i8) vec5, (v16i8) vec4); | |
| 333 vec5 = (v16u8) __msa_pckev_b((v16i8) vec7, (v16i8) vec6); | |
| 334 vec0 = (v16u8) __msa_pckod_b((v16i8) vec1, (v16i8) vec0); | |
| 335 vec1 = (v16u8) __msa_pckod_b((v16i8) vec3, (v16i8) vec2); | |
| 336 reg0 += __msa_hadd_u_h(vec8, vec8); | |
| 337 reg1 += __msa_hadd_u_h(vec9, vec9); | |
| 338 reg2 += __msa_hadd_u_h(vec4, vec4); | |
| 339 reg3 += __msa_hadd_u_h(vec5, vec5); | |
| 340 reg4 += __msa_hadd_u_h(vec0, vec0); | |
| 341 reg5 += __msa_hadd_u_h(vec1, vec1); | |
| 342 reg0 = (v8u16) __msa_srai_h((v8i16) reg0, 2); | |
| 343 reg1 = (v8u16) __msa_srai_h((v8i16) reg1, 2); | |
| 344 reg2 = (v8u16) __msa_srai_h((v8i16) reg2, 2); | |
| 345 reg3 = (v8u16) __msa_srai_h((v8i16) reg3, 2); | |
| 346 reg4 = (v8u16) __msa_srai_h((v8i16) reg4, 2); | |
| 347 reg5 = (v8u16) __msa_srai_h((v8i16) reg5, 2); | |
| 348 reg6 = reg0 * const_0x70; | |
| 349 reg7 = reg1 * const_0x70; | |
| 350 reg8 = reg2 * const_0x4A; | |
| 351 reg9 = reg3 * const_0x4A; | |
| 352 reg6 += const_0x8080; | |
| 353 reg7 += const_0x8080; | |
| 354 reg8 += reg4 * const_0x26; | |
| 355 reg9 += reg5 * const_0x26; | |
| 356 reg0 *= const_0x12; | |
| 357 reg1 *= const_0x12; | |
| 358 reg2 *= const_0x5E; | |
| 359 reg3 *= const_0x5E; | |
| 360 reg4 *= const_0x70; | |
| 361 reg5 *= const_0x70; | |
| 362 reg2 += reg0; | |
| 363 reg3 += reg1; | |
| 364 reg4 += const_0x8080; | |
| 365 reg5 += const_0x8080; | |
| 366 reg6 -= reg8; | |
| 367 reg7 -= reg9; | |
| 368 reg4 -= reg2; | |
| 369 reg5 -= reg3; | |
| 370 reg6 = (v8u16) __msa_srai_h((v8i16) reg6, 8); | |
| 371 reg7 = (v8u16) __msa_srai_h((v8i16) reg7, 8); | |
| 372 reg4 = (v8u16) __msa_srai_h((v8i16) reg4, 8); | |
| 373 reg5 = (v8u16) __msa_srai_h((v8i16) reg5, 8); | |
| 374 dst0 = (v16u8) __msa_pckev_b((v16i8) reg7, (v16i8) reg6); | |
| 375 dst1 = (v16u8) __msa_pckev_b((v16i8) reg5, (v16i8) reg4); | |
| 376 ST_UB(dst0, dst_u); | |
| 377 ST_UB(dst1, dst_v); | |
| 378 src_argb0 += 128; | |
| 379 src_argb0_next += 128; | |
| 380 dst_u += 16; | |
| 381 dst_v += 16; | |
| 382 } | |
| 383 } | |
| 384 | |
| 385 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, uint8* dst_argb, | |
| 386 int width) { | |
| 387 int x; | |
| 388 v16u8 src0, src1; | |
| 389 v8u16 vec0, vec1, vec2, vec3; | |
| 390 v16u8 dst0, dst1, dst2, dst3; | |
| 391 | |
| 392 for (x = 0; x < width; x += 16) { | |
| 393 src0 = (v16u8) __msa_ld_b((v16u8*) src_argb4444, 0); | |
| 394 src1 = (v16u8) __msa_ld_b((v16u8*) src_argb4444, 16); | |
| 395 vec0 = (v8u16) __msa_andi_b(src0, 0x0F); | |
| 396 vec1 = (v8u16) __msa_andi_b(src1, 0x0F); | |
| 397 vec2 = (v8u16) __msa_andi_b(src0, 0xF0); | |
| 398 vec3 = (v8u16) __msa_andi_b(src1, 0xF0); | |
| 399 vec0 |= (v8u16) __msa_slli_b((v16i8) vec0, 4); | |
| 400 vec1 |= (v8u16) __msa_slli_b((v16i8) vec1, 4); | |
| 401 vec2 |= (v8u16) __msa_srli_b((v16i8) vec2, 4); | |
| 402 vec3 |= (v8u16) __msa_srli_b((v16i8) vec3, 4); | |
| 403 dst0 = (v16u8) __msa_ilvr_b((v16i8) vec2, (v16i8) vec0); | |
| 404 dst1 = (v16u8) __msa_ilvl_b((v16i8) vec2, (v16i8) vec0); | |
| 405 dst2 = (v16u8) __msa_ilvr_b((v16i8) vec3, (v16i8) vec1); | |
| 406 dst3 = (v16u8) __msa_ilvl_b((v16i8) vec3, (v16i8) vec1); | |
| 407 ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); | |
| 408 src_argb4444 += 32; | |
| 409 dst_argb += 64; | |
| 410 } | |
| 411 } | |
| 412 | |
| 224 #ifdef __cplusplus | 413 #ifdef __cplusplus |
| 225 } // extern "C" | 414 } // extern "C" |
| 226 } // namespace libyuv | 415 } // namespace libyuv |
| 227 #endif | 416 #endif |
| 228 | 417 |
| 229 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 418 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| OLD | NEW |