Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 214 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | 214 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); |
| 215 dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); | 215 dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); |
| 216 ST_UB(dst0, dst_u); | 216 ST_UB(dst0, dst_u); |
| 217 ST_UB(dst1, dst_v); | 217 ST_UB(dst1, dst_v); |
| 218 src_uyvy += 64; | 218 src_uyvy += 64; |
| 219 dst_u += 16; | 219 dst_u += 16; |
| 220 dst_v += 16; | 220 dst_v += 16; |
| 221 } | 221 } |
| 222 } | 222 } |
| 223 | 223 |
| 224 void ARGB4444ToYRow_MSA(const uint8* src_argb4444, uint8* dst_y, int width) { | |
| 225 int x; | |
| 226 const uint16* src_argb4444_u16 = (uint16*) src_argb4444; | |
| 227 v8u16 src0, src1; | |
| 228 v8u16 vec0, vec1, vec2, vec3, vec4, vec5; | |
| 229 v16u8 dst0; | |
| 230 v8u16 const_0x19 = (v8u16) __msa_ldi_h(0x19); | |
| 231 v8u16 const_0x81 = (v8u16) __msa_ldi_h(0x81); | |
| 232 v8u16 const_0x42 = (v8u16) __msa_ldi_h(0x42); | |
| 233 v8u16 const_0x1080 = (v8u16) __msa_fill_h(0x1080); | |
| 234 v8u16 const_0x0F = (v8u16) __msa_ldi_h(0x0F); | |
| 235 | |
| 236 for (x = 0; x < width; x += 16) { | |
| 237 LD_UH2(src_argb4444_u16, 8, src0, src1); | |
| 238 vec0 = src0 & const_0x0F; | |
| 239 vec1 = src1 & const_0x0F; | |
| 240 src0 = (v8u16) __msa_srai_h((v8i16) src0, 4); | |
| 241 src1 = (v8u16) __msa_srai_h((v8i16) src1, 4); | |
| 242 vec2 = src0 & const_0x0F; | |
| 243 vec3 = src1 & const_0x0F; | |
| 244 src0 = (v8u16) __msa_srai_h((v8i16) src0, 4); | |
| 245 src1 = (v8u16) __msa_srai_h((v8i16) src1, 4); | |
| 246 vec4 = src0 & const_0x0F; | |
| 247 vec5 = src1 & const_0x0F; | |
| 248 vec0 |= (v8u16) __msa_slli_h((v8i16) vec0, 4); | |
| 249 vec1 |= (v8u16) __msa_slli_h((v8i16) vec1, 4); | |
| 250 vec2 |= (v8u16) __msa_slli_h((v8i16) vec2, 4); | |
| 251 vec3 |= (v8u16) __msa_slli_h((v8i16) vec3, 4); | |
| 252 vec4 |= (v8u16) __msa_slli_h((v8i16) vec4, 4); | |
| 253 vec5 |= (v8u16) __msa_slli_h((v8i16) vec5, 4); | |
| 254 vec0 *= const_0x19; | |
|
fbarchard1
2016/10/14 21:35:16
FYI The YUV to RGB functions now take constants as
manojkumar.bhosale
2016/10/19 11:56:27
OK. will then fix them when changes happens.
| |
| 255 vec1 *= const_0x19; | |
| 256 vec2 *= const_0x81; | |
| 257 vec3 *= const_0x81; | |
| 258 vec4 *= const_0x42; | |
| 259 vec5 *= const_0x42; | |
| 260 vec0 += vec2; | |
| 261 vec1 += vec3; | |
| 262 vec0 += vec4; | |
| 263 vec1 += vec5; | |
| 264 vec0 += const_0x1080; | |
| 265 vec1 += const_0x1080; | |
| 266 vec0 = (v8u16) __msa_srai_h((v8i16) vec0, 8); | |
| 267 vec1 = (v8u16) __msa_srai_h((v8i16) vec1, 8); | |
| 268 dst0 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0); | |
| 269 ST_UB(dst0, dst_y); | |
| 270 src_argb4444_u16 += 16; | |
| 271 dst_y += 16; | |
| 272 } | |
| 273 } | |
| 274 | |
| 275 void ARGB4444ToUVRow_MSA(const uint8* src_argb4444, | |
| 276 int src_stride_argb4444, | |
| 277 uint8* dst_u, uint8* dst_v, int width) { | |
| 278 int x; | |
| 279 const uint8* src_argb4444_next = src_argb4444 + src_stride_argb4444; | |
| 280 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; | |
| 281 v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; | |
| 282 v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; | |
| 283 v16u8 dst0, dst1; | |
| 284 v8u16 const_0x70 = (v8u16) __msa_ldi_h(0x70); | |
| 285 v8u16 const_0x4A = (v8u16) __msa_ldi_h(0x4A); | |
| 286 v8u16 const_0x26 = (v8u16) __msa_ldi_h(0x26); | |
| 287 v8u16 const_0x5E = (v8u16) __msa_ldi_h(0x5E); | |
| 288 v8u16 const_0x12 = (v8u16) __msa_ldi_h(0x12); | |
| 289 v8u16 const_0x8080 = (v8u16) __msa_fill_h(0x8080); | |
| 290 | |
| 291 for (x = 0; x < width; x += 32) { | |
| 292 LD_UB4(src_argb4444, 16, src0, src1, src2, src3); | |
| 293 LD_UB4(src_argb4444_next, 16, src4, src5, src6, src7); | |
| 294 reg0 = __msa_andi_b(src0, 0x0F); | |
| 295 reg1 = __msa_andi_b(src1, 0x0F); | |
| 296 reg2 = __msa_andi_b(src2, 0x0F); | |
| 297 reg3 = __msa_andi_b(src3, 0x0F); | |
| 298 reg0 += __msa_andi_b(src4, 0x0F); | |
| 299 reg1 += __msa_andi_b(src5, 0x0F); | |
| 300 reg2 += __msa_andi_b(src6, 0x0F); | |
| 301 reg3 += __msa_andi_b(src7, 0x0F); | |
| 302 src0 = __msa_andi_b(src0, 0xF0); | |
| 303 src1 = __msa_andi_b(src1, 0xF0); | |
| 304 src2 = __msa_andi_b(src2, 0xF0); | |
| 305 src3 = __msa_andi_b(src3, 0xF0); | |
| 306 src4 = __msa_andi_b(src4, 0xF0); | |
| 307 src5 = __msa_andi_b(src5, 0xF0); | |
| 308 src6 = __msa_andi_b(src6, 0xF0); | |
| 309 src7 = __msa_andi_b(src7, 0xF0); | |
| 310 reg4 = (v16u8) __msa_srli_b((v16i8) src0, 4); | |
| 311 reg5 = (v16u8) __msa_srli_b((v16i8) src1, 4); | |
| 312 reg6 = (v16u8) __msa_srli_b((v16i8) src2, 4); | |
| 313 reg7 = (v16u8) __msa_srli_b((v16i8) src3, 4); | |
| 314 reg4 += (v16u8) __msa_srli_b((v16i8) src4, 4); | |
| 315 reg5 += (v16u8) __msa_srli_b((v16i8) src5, 4); | |
| 316 reg6 += (v16u8) __msa_srli_b((v16i8) src6, 4); | |
| 317 reg7 += (v16u8) __msa_srli_b((v16i8) src7, 4); | |
| 318 reg8 = (v16u8) __msa_pckod_b((v16i8) reg1, (v16i8) reg0); | |
| 319 reg9 = (v16u8) __msa_pckod_b((v16i8) reg3, (v16i8) reg2); | |
| 320 reg0 = (v16u8) __msa_pckev_b((v16i8) reg1, (v16i8) reg0); | |
| 321 reg1 = (v16u8) __msa_pckev_b((v16i8) reg3, (v16i8) reg2); | |
| 322 reg2 = (v16u8) __msa_pckev_b((v16i8) reg5, (v16i8) reg4); | |
| 323 reg3 = (v16u8) __msa_pckev_b((v16i8) reg7, (v16i8) reg6); | |
| 324 vec0 = __msa_hadd_u_h(reg0, reg0); | |
| 325 vec1 = __msa_hadd_u_h(reg1, reg1); | |
| 326 vec2 = __msa_hadd_u_h(reg2, reg2); | |
| 327 vec3 = __msa_hadd_u_h(reg3, reg3); | |
| 328 vec4 = __msa_hadd_u_h(reg8, reg8); | |
| 329 vec5 = __msa_hadd_u_h(reg9, reg9); | |
| 330 vec0 = (v8u16) __msa_slli_h((v8i16) vec0, 2); | |
| 331 vec1 = (v8u16) __msa_slli_h((v8i16) vec1, 2); | |
| 332 vec2 = (v8u16) __msa_slli_h((v8i16) vec2, 2); | |
| 333 vec3 = (v8u16) __msa_slli_h((v8i16) vec3, 2); | |
| 334 vec4 = (v8u16) __msa_slli_h((v8i16) vec4, 2); | |
| 335 vec5 = (v8u16) __msa_slli_h((v8i16) vec5, 2); | |
| 336 vec0 |= (v8u16) __msa_srai_h((v8i16) vec0, 6); | |
| 337 vec1 |= (v8u16) __msa_srai_h((v8i16) vec1, 6); | |
| 338 vec2 |= (v8u16) __msa_srai_h((v8i16) vec2, 6); | |
| 339 vec3 |= (v8u16) __msa_srai_h((v8i16) vec3, 6); | |
| 340 vec4 |= (v8u16) __msa_srai_h((v8i16) vec4, 6); | |
| 341 vec5 |= (v8u16) __msa_srai_h((v8i16) vec5, 6); | |
|
fbarchard1
2016/10/14 21:35:16
I'm concerned that this is a lot of code for a for
manojkumar.bhosale
2016/10/19 11:56:27
Done.
| |
| 342 vec6 = vec0 * const_0x70; | |
| 343 vec7 = vec1 * const_0x70; | |
| 344 vec8 = vec2 * const_0x4A; | |
| 345 vec9 = vec3 * const_0x4A; | |
| 346 vec0 *= const_0x12; | |
| 347 vec1 *= const_0x12; | |
| 348 vec2 *= const_0x5E; | |
| 349 vec3 *= const_0x5E; | |
| 350 vec6 += const_0x8080; | |
| 351 vec7 += const_0x8080; | |
| 352 vec8 += vec4 * const_0x26; | |
| 353 vec9 += vec5 * const_0x26; | |
| 354 vec4 *= const_0x70; | |
| 355 vec5 *= const_0x70; | |
| 356 vec2 += vec0; | |
| 357 vec3 += vec1; | |
| 358 vec4 += const_0x8080; | |
| 359 vec5 += const_0x8080; | |
| 360 vec0 = vec6 - vec8; | |
| 361 vec1 = vec7 - vec9; | |
| 362 vec2 = vec4 - vec2; | |
| 363 vec3 = vec5 - vec3; | |
| 364 vec0 = (v8u16) __msa_srli_h((v8i16) vec0, 8); | |
| 365 vec1 = (v8u16) __msa_srli_h((v8i16) vec1, 8); | |
| 366 vec2 = (v8u16) __msa_srli_h((v8i16) vec2, 8); | |
| 367 vec3 = (v8u16) __msa_srli_h((v8i16) vec3, 8); | |
| 368 dst0 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0); | |
| 369 dst1 = (v16u8) __msa_pckev_b((v16i8) vec3, (v16i8) vec2); | |
| 370 ST_UB(dst0, dst_u); | |
| 371 ST_UB(dst1, dst_v); | |
| 372 src_argb4444 += 64; | |
| 373 src_argb4444_next += 64; | |
|
fbarchard1
2016/10/14 21:35:16
on other platforms I'd typically unroll less than
| |
| 374 dst_u += 16; | |
| 375 dst_v += 16; | |
| 376 } | |
| 377 } | |
| 378 | |
| 379 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, uint8* dst_argb, | |
| 380 int width) { | |
| 381 int x; | |
| 382 v16u8 src0, src1; | |
| 383 v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; | |
| 384 v16u8 dst0, dst1, dst2, dst3; | |
| 385 | |
| 386 for (x = 0; x < width; x += 16) { | |
| 387 LD_UB2(src_argb4444, 16, src0, src1); | |
| 388 vec0 = (v8u16) __msa_andi_b(src0, 0x0F); | |
| 389 vec1 = (v8u16) __msa_andi_b(src1, 0x0F); | |
| 390 vec2 = (v8u16) __msa_andi_b(src0, 0xF0); | |
| 391 vec3 = (v8u16) __msa_andi_b(src1, 0xF0); | |
| 392 vec4 = (v8u16) __msa_slli_b((v16i8) vec0, 4); | |
| 393 vec5 = (v8u16) __msa_slli_b((v16i8) vec1, 4); | |
| 394 vec6 = (v8u16) __msa_srli_b((v16i8) vec2, 4); | |
| 395 vec7 = (v8u16) __msa_srli_b((v16i8) vec3, 4); | |
| 396 vec0 |= vec4; | |
| 397 vec1 |= vec5; | |
| 398 vec2 |= vec6; | |
| 399 vec3 |= vec7; | |
| 400 dst0 = (v16u8) __msa_ilvr_b((v16i8) vec2, (v16i8) vec0); | |
| 401 dst1 = (v16u8) __msa_ilvl_b((v16i8) vec2, (v16i8) vec0); | |
| 402 dst2 = (v16u8) __msa_ilvr_b((v16i8) vec3, (v16i8) vec1); | |
| 403 dst3 = (v16u8) __msa_ilvl_b((v16i8) vec3, (v16i8) vec1); | |
| 404 ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); | |
| 405 src_argb4444 += 32; | |
| 406 dst_argb += 64; | |
| 407 } | |
| 408 } | |
| 409 | |
| 224 #ifdef __cplusplus | 410 #ifdef __cplusplus |
| 225 } // extern "C" | 411 } // extern "C" |
| 226 } // namespace libyuv | 412 } // namespace libyuv |
| 227 #endif | 413 #endif |
| 228 | 414 |
| 229 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 415 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| OLD | NEW |