Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(279)

Side by Side Diff: source/libvpx/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h>
11 #include "./vpx_dsp_rtcd.h" 12 #include "./vpx_dsp_rtcd.h"
12 #include "vpx_dsp/mips/vpx_convolve_msa.h" 13 #include "vpx_dsp/mips/vpx_convolve_msa.h"
13 14
14 static void common_vt_8t_and_aver_dst_4w_msa(const uint8_t *src, 15 static void common_vt_8t_and_aver_dst_4w_msa(const uint8_t *src,
15 int32_t src_stride, 16 int32_t src_stride,
16 uint8_t *dst, 17 uint8_t *dst,
17 int32_t dst_stride, 18 int32_t dst_stride,
18 int8_t *filter, 19 int8_t *filter,
19 int32_t height) { 20 int32_t height) {
20 uint32_t loop_cnt; 21 uint32_t loop_cnt;
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
276 src += src_stride; 277 src += src_stride;
277 278
278 LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 279 LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
279 ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1); 280 ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
280 dst0 = (v16u8)__msa_ilvr_d((v2i64)dst1, (v2i64)dst0); 281 dst0 = (v16u8)__msa_ilvr_d((v2i64)dst1, (v2i64)dst0);
281 ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, 282 ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
282 src32_r, src43_r); 283 src32_r, src43_r);
283 ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332); 284 ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
284 DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1); 285 DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
285 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); 286 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
286 SAT_UH2_UH(tmp0, tmp1, 7);
287 287
288 out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); 288 out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
289 out = __msa_aver_u_b(out, dst0); 289 out = __msa_aver_u_b(out, dst0);
290 290
291 ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); 291 ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
292 } 292 }
293 293
294 static void common_vt_2t_and_aver_dst_4x8_msa(const uint8_t *src, 294 static void common_vt_2t_and_aver_dst_4x8_msa(const uint8_t *src,
295 int32_t src_stride, 295 int32_t src_stride,
296 uint8_t *dst, 296 uint8_t *dst,
(...skipping 19 matching lines...) Expand all
316 ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1); 316 ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
317 ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, 317 ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
318 src32_r, src43_r); 318 src32_r, src43_r);
319 ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r, 319 ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
320 src76_r, src87_r); 320 src76_r, src87_r);
321 ILVR_D4_UB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, 321 ILVR_D4_UB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
322 src87_r, src76_r, src2110, src4332, src6554, src8776); 322 src87_r, src76_r, src2110, src4332, src6554, src8776);
323 DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0, 323 DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
324 tmp0, tmp1, tmp2, tmp3); 324 tmp0, tmp1, tmp2, tmp3);
325 SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); 325 SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
326 SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
327 PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src2110, src4332); 326 PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
328 AVER_UB2_UB(src2110, dst0, src4332, dst1, src2110, src4332); 327 AVER_UB2_UB(src2110, dst0, src4332, dst1, src2110, src4332);
329 ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride); 328 ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
330 dst += (4 * dst_stride); 329 dst += (4 * dst_stride);
331 ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst, dst_stride); 330 ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst, dst_stride);
332 } 331 }
333 332
334 static void common_vt_2t_and_aver_dst_4w_msa(const uint8_t *src, 333 static void common_vt_2t_and_aver_dst_4w_msa(const uint8_t *src,
335 int32_t src_stride, 334 int32_t src_stride,
336 uint8_t *dst, 335 uint8_t *dst,
(...skipping 21 matching lines...) Expand all
358 filt = LD_SH(filter); 357 filt = LD_SH(filter);
359 filt0 = (v16u8)__msa_splati_h(filt, 0); 358 filt0 = (v16u8)__msa_splati_h(filt, 0);
360 359
361 LD_UB5(src, src_stride, src0, src1, src2, src3, src4); 360 LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
362 LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 361 LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
363 ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1); 362 ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
364 ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3); 363 ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
365 DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1, 364 DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
366 tmp2, tmp3); 365 tmp2, tmp3);
367 SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); 366 SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
368 SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
369 PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3, 367 PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3,
370 dst, dst_stride); 368 dst, dst_stride);
371 } 369 }
372 370
373 static void common_vt_2t_and_aver_dst_8x8mult_msa(const uint8_t *src, 371 static void common_vt_2t_and_aver_dst_8x8mult_msa(const uint8_t *src,
374 int32_t src_stride, 372 int32_t src_stride,
375 uint8_t *dst, 373 uint8_t *dst,
376 int32_t dst_stride, 374 int32_t dst_stride,
377 int8_t *filter, 375 int8_t *filter,
378 int32_t height) { 376 int32_t height) {
(...skipping 16 matching lines...) Expand all
395 src += (8 * src_stride); 393 src += (8 * src_stride);
396 LD_UB8(dst, dst_stride, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8); 394 LD_UB8(dst, dst_stride, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8);
397 395
398 ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, 396 ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1,
399 vec2, vec3); 397 vec2, vec3);
400 ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5, 398 ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5,
401 vec6, vec7); 399 vec6, vec7);
402 DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1, 400 DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
403 tmp2, tmp3); 401 tmp2, tmp3);
404 SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); 402 SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
405 SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
406 PCKEV_AVG_ST8x4_UB(tmp0, dst1, tmp1, dst2, tmp2, dst3, tmp3, dst4, 403 PCKEV_AVG_ST8x4_UB(tmp0, dst1, tmp1, dst2, tmp2, dst3, tmp3, dst4,
407 dst, dst_stride); 404 dst, dst_stride);
408 dst += (4 * dst_stride); 405 dst += (4 * dst_stride);
409 406
410 DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1, 407 DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1,
411 tmp2, tmp3); 408 tmp2, tmp3);
412 SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); 409 SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
413 SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
414 PCKEV_AVG_ST8x4_UB(tmp0, dst5, tmp1, dst6, tmp2, dst7, tmp3, dst8, 410 PCKEV_AVG_ST8x4_UB(tmp0, dst5, tmp1, dst6, tmp2, dst7, tmp3, dst8,
415 dst, dst_stride); 411 dst, dst_stride);
416 dst += (4 * dst_stride); 412 dst += (4 * dst_stride);
417 413
418 src0 = src8; 414 src0 = src8;
419 } 415 }
420 } 416 }
421 417
422 static void common_vt_2t_and_aver_dst_8w_msa(const uint8_t *src, 418 static void common_vt_2t_and_aver_dst_8w_msa(const uint8_t *src,
423 int32_t src_stride, 419 int32_t src_stride,
(...skipping 29 matching lines...) Expand all
453 449
454 for (loop_cnt = (height >> 2); loop_cnt--;) { 450 for (loop_cnt = (height >> 2); loop_cnt--;) {
455 LD_UB4(src, src_stride, src1, src2, src3, src4); 451 LD_UB4(src, src_stride, src1, src2, src3, src4);
456 src += (4 * src_stride); 452 src += (4 * src_stride);
457 453
458 LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 454 LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
459 ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); 455 ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
460 ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); 456 ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
461 DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); 457 DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
462 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); 458 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
463 SAT_UH2_UH(tmp0, tmp1, 7);
464 PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst); 459 PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
465 dst += dst_stride; 460 dst += dst_stride;
466 461
467 ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6); 462 ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
468 ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7); 463 ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
469 DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); 464 DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
470 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); 465 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
471 SAT_UH2_UH(tmp2, tmp3, 7);
472 PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst); 466 PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst);
473 dst += dst_stride; 467 dst += dst_stride;
474 468
475 DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); 469 DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
476 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); 470 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
477 SAT_UH2_UH(tmp0, tmp1, 7);
478 PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst); 471 PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst);
479 dst += dst_stride; 472 dst += dst_stride;
480 473
481 DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); 474 DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
482 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); 475 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
483 SAT_UH2_UH(tmp2, tmp3, 7);
484 PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst); 476 PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst);
485 dst += dst_stride; 477 dst += dst_stride;
486 478
487 src0 = src4; 479 src0 = src4;
488 } 480 }
489 } 481 }
490 482
491 static void common_vt_2t_and_aver_dst_32w_msa(const uint8_t *src, 483 static void common_vt_2t_and_aver_dst_32w_msa(const uint8_t *src,
492 int32_t src_stride, 484 int32_t src_stride,
493 uint8_t *dst, 485 uint8_t *dst,
(...skipping 18 matching lines...) Expand all
512 LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 504 LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
513 ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); 505 ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
514 ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); 506 ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
515 507
516 LD_UB4(src + 16, src_stride, src6, src7, src8, src9); 508 LD_UB4(src + 16, src_stride, src6, src7, src8, src9);
517 LD_UB4(dst + 16, dst_stride, dst4, dst5, dst6, dst7); 509 LD_UB4(dst + 16, dst_stride, dst4, dst5, dst6, dst7);
518 src += (4 * src_stride); 510 src += (4 * src_stride);
519 511
520 DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); 512 DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
521 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); 513 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
522 SAT_UH2_UH(tmp0, tmp1, 7);
523 PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst); 514 PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
524 515
525 DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); 516 DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
526 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); 517 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
527 SAT_UH2_UH(tmp2, tmp3, 7);
528 PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride); 518 PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride);
529 519
530 ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6); 520 ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
531 ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7); 521 ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
532 DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); 522 DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
533 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); 523 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
534 SAT_UH2_UH(tmp0, tmp1, 7);
535 PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst + 2 * dst_stride); 524 PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst + 2 * dst_stride);
536 525
537 DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); 526 DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
538 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); 527 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
539 SAT_UH2_UH(tmp2, tmp3, 7);
540 PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst + 3 * dst_stride); 528 PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst + 3 * dst_stride);
541 529
542 ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2); 530 ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2);
543 ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3); 531 ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3);
544 DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); 532 DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
545 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); 533 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
546 SAT_UH2_UH(tmp0, tmp1, 7);
547 PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 16); 534 PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 16);
548 535
549 DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); 536 DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
550 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); 537 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
551 SAT_UH2_UH(tmp2, tmp3, 7);
552 PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 16 + dst_stride); 538 PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 16 + dst_stride);
553 539
554 ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6); 540 ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6);
555 ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7); 541 ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7);
556 DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); 542 DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
557 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); 543 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
558 SAT_UH2_UH(tmp0, tmp1, 7);
559 PCKEV_AVG_ST_UB(tmp1, tmp0, dst6, dst + 16 + 2 * dst_stride); 544 PCKEV_AVG_ST_UB(tmp1, tmp0, dst6, dst + 16 + 2 * dst_stride);
560 545
561 DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); 546 DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
562 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); 547 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
563 SAT_UH2_UH(tmp2, tmp3, 7);
564 PCKEV_AVG_ST_UB(tmp3, tmp2, dst7, dst + 16 + 3 * dst_stride); 548 PCKEV_AVG_ST_UB(tmp3, tmp2, dst7, dst + 16 + 3 * dst_stride);
565 dst += (4 * dst_stride); 549 dst += (4 * dst_stride);
566 550
567 src0 = src4; 551 src0 = src4;
568 src5 = src9; 552 src5 = src9;
569 } 553 }
570 } 554 }
571 555
572 static void common_vt_2t_and_aver_dst_64w_msa(const uint8_t *src, 556 static void common_vt_2t_and_aver_dst_64w_msa(const uint8_t *src,
573 int32_t src_stride, 557 int32_t src_stride,
(...skipping 24 matching lines...) Expand all
598 LD_UB2(src + 32, src_stride, src7, src8); 582 LD_UB2(src + 32, src_stride, src7, src8);
599 LD_UB2(dst + 32, dst_stride, dst4, dst5); 583 LD_UB2(dst + 32, dst_stride, dst4, dst5);
600 LD_UB2(src + 48, src_stride, src10, src11); 584 LD_UB2(src + 48, src_stride, src10, src11);
601 LD_UB2(dst + 48, dst_stride, dst6, dst7); 585 LD_UB2(dst + 48, dst_stride, dst6, dst7);
602 src += (2 * src_stride); 586 src += (2 * src_stride);
603 587
604 ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); 588 ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
605 ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); 589 ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
606 DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); 590 DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
607 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); 591 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
608 SAT_UH2_UH(tmp0, tmp1, 7);
609 PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst); 592 PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
610 593
611 DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); 594 DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
612 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); 595 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
613 SAT_UH2_UH(tmp2, tmp3, 7);
614 PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride); 596 PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride);
615 597
616 ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6); 598 ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6);
617 ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7); 599 ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7);
618 DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5); 600 DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
619 SRARI_H2_UH(tmp4, tmp5, FILTER_BITS); 601 SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
620 SAT_UH2_UH(tmp4, tmp5, 7);
621 PCKEV_AVG_ST_UB(tmp5, tmp4, dst2, dst + 16); 602 PCKEV_AVG_ST_UB(tmp5, tmp4, dst2, dst + 16);
622 603
623 DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7); 604 DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
624 SRARI_H2_UH(tmp6, tmp7, FILTER_BITS); 605 SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
625 SAT_UH2_UH(tmp6, tmp7, 7);
626 PCKEV_AVG_ST_UB(tmp7, tmp6, dst3, dst + 16 + dst_stride); 606 PCKEV_AVG_ST_UB(tmp7, tmp6, dst3, dst + 16 + dst_stride);
627 607
628 ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2); 608 ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2);
629 ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3); 609 ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3);
630 DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); 610 DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
631 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); 611 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
632 SAT_UH2_UH(tmp0, tmp1, 7);
633 PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 32); 612 PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 32);
634 613
635 DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); 614 DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
636 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); 615 SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
637 SAT_UH2_UH(tmp2, tmp3, 7);
638 PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 32 + dst_stride); 616 PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 32 + dst_stride);
639 617
640 ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6); 618 ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6);
641 ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7); 619 ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7);
642 DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5); 620 DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
643 SRARI_H2_UH(tmp4, tmp5, FILTER_BITS); 621 SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
644 SAT_UH2_UH(tmp4, tmp5, 7);
645 PCKEV_AVG_ST_UB(tmp5, tmp4, dst6, (dst + 48)); 622 PCKEV_AVG_ST_UB(tmp5, tmp4, dst6, (dst + 48));
646 623
647 DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7); 624 DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
648 SRARI_H2_UH(tmp6, tmp7, FILTER_BITS); 625 SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
649 SAT_UH2_UH(tmp6, tmp7, 7);
650 PCKEV_AVG_ST_UB(tmp7, tmp6, dst7, dst + 48 + dst_stride); 626 PCKEV_AVG_ST_UB(tmp7, tmp6, dst7, dst + 48 + dst_stride);
651 dst += (2 * dst_stride); 627 dst += (2 * dst_stride);
652 628
653 src0 = src2; 629 src0 = src2;
654 src3 = src5; 630 src3 = src5;
655 src6 = src8; 631 src6 = src8;
656 src9 = src11; 632 src9 = src11;
657 } 633 }
658 } 634 }
659 635
660 void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride, 636 void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
661 uint8_t *dst, ptrdiff_t dst_stride, 637 uint8_t *dst, ptrdiff_t dst_stride,
662 const int16_t *filter_x, int x_step_q4, 638 const int16_t *filter_x, int x_step_q4,
663 const int16_t *filter_y, int y_step_q4, 639 const int16_t *filter_y, int y_step_q4,
664 int w, int h) { 640 int w, int h) {
665 int8_t cnt, filt_ver[8]; 641 int8_t cnt, filt_ver[8];
666 642
667 if (16 != y_step_q4) { 643 assert(y_step_q4 == 16);
668 vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, 644 assert(((const int32_t *)filter_y)[1] != 0x800000);
669 filter_x, x_step_q4, filter_y, y_step_q4,
670 w, h);
671 return;
672 }
673
674 if (((const int32_t *)filter_y)[1] == 0x800000) {
675 vpx_convolve_avg(src, src_stride, dst, dst_stride,
676 filter_x, x_step_q4, filter_y, y_step_q4,
677 w, h);
678 return;
679 }
680 645
681 for (cnt = 0; cnt < 8; ++cnt) { 646 for (cnt = 0; cnt < 8; ++cnt) {
682 filt_ver[cnt] = filter_y[cnt]; 647 filt_ver[cnt] = filter_y[cnt];
683 } 648 }
684 649
685 if (((const int32_t *)filter_y)[0] == 0) { 650 if (((const int32_t *)filter_y)[0] == 0) {
686 switch (w) { 651 switch (w) {
687 case 4: 652 case 4:
688 common_vt_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, 653 common_vt_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride,
689 dst, (int32_t)dst_stride, 654 dst, (int32_t)dst_stride,
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
744 filt_ver, h); 709 filt_ver, h);
745 break; 710 break;
746 default: 711 default:
747 vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, 712 vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
748 filter_x, x_step_q4, filter_y, y_step_q4, 713 filter_x, x_step_q4, filter_y, y_step_q4,
749 w, h); 714 w, h);
750 break; 715 break;
751 } 716 }
752 } 717 }
753 } 718 }
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/mips/vpx_convolve8_avg_msa.c ('k') | source/libvpx/vpx_dsp/mips/vpx_convolve8_dspr2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698