Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(57)

Side by Side Diff: source/libvpx/vpx_dsp/mips/itrans8_dspr2.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/libvpx/vpx_dsp/mips/itrans4_dspr2.c ('k') | source/libvpx/vpx_dsp/mips/macros_msa.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h>
12 #include <stdio.h>
13
14 #include "./vpx_config.h" 11 #include "./vpx_config.h"
15 #include "./vp9_rtcd.h" 12 #include "./vpx_dsp_rtcd.h"
16 #include "vp9/common/vp9_common.h" 13 #include "vpx_dsp/mips/inv_txfm_dspr2.h"
17 #include "vp9/common/vp9_blockd.h"
18 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
19 #include "vpx_dsp/txfm_common.h" 14 #include "vpx_dsp/txfm_common.h"
20 #include "vpx_ports/mem.h"
21 15
22 #if HAVE_DSPR2 16 #if HAVE_DSPR2
23 static void idct8_rows_dspr2(const int16_t *input, int16_t *output, 17 void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows) {
24 uint32_t no_rows) {
25 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; 18 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
26 const int const_2_power_13 = 8192; 19 const int const_2_power_13 = 8192;
27 int Temp0, Temp1, Temp2, Temp3, Temp4; 20 int Temp0, Temp1, Temp2, Temp3, Temp4;
28 int i; 21 int i;
29 22
30 for (i = no_rows; i--; ) { 23 for (i = no_rows; i--; ) {
31 __asm__ __volatile__ ( 24 __asm__ __volatile__ (
32 /* 25 /*
33 temp_1 = (input[0] + input[4]) * cospi_16_64; 26 temp_1 = (input[0] + input[4]) * cospi_16_64;
34 step2_0 = dct_const_round_shift(temp_1); 27 step2_0 = dct_const_round_shift(temp_1);
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 [cospi_20_64] "r" (cospi_20_64), [cospi_8_64] "r" (cospi_8_64), 187 [cospi_20_64] "r" (cospi_20_64), [cospi_8_64] "r" (cospi_8_64),
195 [cospi_24_64] "r" (cospi_24_64), 188 [cospi_24_64] "r" (cospi_24_64),
196 [output] "r" (output), [input] "r" (input) 189 [output] "r" (output), [input] "r" (input)
197 ); 190 );
198 191
199 input += 8; 192 input += 8;
200 output += 1; 193 output += 1;
201 } 194 }
202 } 195 }
203 196
204 static void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, 197 void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
205 int dest_stride) { 198 int dest_stride) {
206 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; 199 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
207 int Temp0, Temp1, Temp2, Temp3; 200 int Temp0, Temp1, Temp2, Temp3;
208 int i; 201 int i;
209 const int const_2_power_13 = 8192; 202 const int const_2_power_13 = 8192;
210 uint8_t *dest_pix; 203 uint8_t *dest_pix;
211 uint8_t *cm = vpx_ff_cropTbl; 204 uint8_t *cm = vpx_ff_cropTbl;
212 205
213 /* prefetch vpx_ff_cropTbl */ 206 /* prefetch vpx_ff_cropTbl */
214 prefetch_load(vpx_ff_cropTbl); 207 prefetch_load(vpx_ff_cropTbl);
215 prefetch_load(vpx_ff_cropTbl + 32); 208 prefetch_load(vpx_ff_cropTbl + 32);
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
442 [cospi_4_64] "r" (cospi_4_64), [cospi_12_64] "r" (cospi_12_64), 435 [cospi_4_64] "r" (cospi_4_64), [cospi_12_64] "r" (cospi_12_64),
443 [cospi_20_64] "r" (cospi_20_64), [cospi_8_64] "r" (cospi_8_64), 436 [cospi_20_64] "r" (cospi_20_64), [cospi_8_64] "r" (cospi_8_64),
444 [cospi_24_64] "r" (cospi_24_64), 437 [cospi_24_64] "r" (cospi_24_64),
445 [input] "r" (input), [cm] "r" (cm), [dest_stride] "r" (dest_stride) 438 [input] "r" (input), [cm] "r" (cm), [dest_stride] "r" (dest_stride)
446 ); 439 );
447 440
448 input += 8; 441 input += 8;
449 } 442 }
450 } 443 }
451 444
452 void vp9_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, 445 void vpx_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
453 int dest_stride) { 446 int dest_stride) {
454 DECLARE_ALIGNED(32, int16_t, out[8 * 8]); 447 DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
455 int16_t *outptr = out; 448 int16_t *outptr = out;
456 uint32_t pos = 45; 449 uint32_t pos = 45;
457 450
458 /* bit positon for extract from acc */ 451 /* bit positon for extract from acc */
459 __asm__ __volatile__ ( 452 __asm__ __volatile__ (
460 "wrdsp %[pos], 1 \n\t" 453 "wrdsp %[pos], 1 \n\t"
461 : 454 :
462 : [pos] "r" (pos) 455 : [pos] "r" (pos)
463 ); 456 );
464 457
465 // First transform rows 458 // First transform rows
466 idct8_rows_dspr2(input, outptr, 8); 459 idct8_rows_dspr2(input, outptr, 8);
467 460
468 // Then transform columns and add to dest 461 // Then transform columns and add to dest
469 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); 462 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
470 } 463 }
471 464
472 static void iadst8_dspr2(const int16_t *input, int16_t *output) { 465 void vpx_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,
473 int s0, s1, s2, s3, s4, s5, s6, s7;
474 int x0, x1, x2, x3, x4, x5, x6, x7;
475
476 x0 = input[7];
477 x1 = input[0];
478 x2 = input[5];
479 x3 = input[2];
480 x4 = input[3];
481 x5 = input[4];
482 x6 = input[1];
483 x7 = input[6];
484
485 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
486 output[0] = output[1] = output[2] = output[3] = output[4]
487 = output[5] = output[6] = output[7] = 0;
488 return;
489 }
490
491 // stage 1
492 s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
493 s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
494 s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
495 s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
496 s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
497 s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
498 s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
499 s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
500
501 x0 = ROUND_POWER_OF_TWO((s0 + s4), DCT_CONST_BITS);
502 x1 = ROUND_POWER_OF_TWO((s1 + s5), DCT_CONST_BITS);
503 x2 = ROUND_POWER_OF_TWO((s2 + s6), DCT_CONST_BITS);
504 x3 = ROUND_POWER_OF_TWO((s3 + s7), DCT_CONST_BITS);
505 x4 = ROUND_POWER_OF_TWO((s0 - s4), DCT_CONST_BITS);
506 x5 = ROUND_POWER_OF_TWO((s1 - s5), DCT_CONST_BITS);
507 x6 = ROUND_POWER_OF_TWO((s2 - s6), DCT_CONST_BITS);
508 x7 = ROUND_POWER_OF_TWO((s3 - s7), DCT_CONST_BITS);
509
510 // stage 2
511 s0 = x0;
512 s1 = x1;
513 s2 = x2;
514 s3 = x3;
515 s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
516 s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
517 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
518 s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
519
520 x0 = s0 + s2;
521 x1 = s1 + s3;
522 x2 = s0 - s2;
523 x3 = s1 - s3;
524 x4 = ROUND_POWER_OF_TWO((s4 + s6), DCT_CONST_BITS);
525 x5 = ROUND_POWER_OF_TWO((s5 + s7), DCT_CONST_BITS);
526 x6 = ROUND_POWER_OF_TWO((s4 - s6), DCT_CONST_BITS);
527 x7 = ROUND_POWER_OF_TWO((s5 - s7), DCT_CONST_BITS);
528
529 // stage 3
530 s2 = cospi_16_64 * (x2 + x3);
531 s3 = cospi_16_64 * (x2 - x3);
532 s6 = cospi_16_64 * (x6 + x7);
533 s7 = cospi_16_64 * (x6 - x7);
534
535 x2 = ROUND_POWER_OF_TWO((s2), DCT_CONST_BITS);
536 x3 = ROUND_POWER_OF_TWO((s3), DCT_CONST_BITS);
537 x6 = ROUND_POWER_OF_TWO((s6), DCT_CONST_BITS);
538 x7 = ROUND_POWER_OF_TWO((s7), DCT_CONST_BITS);
539
540 output[0] = x0;
541 output[1] = -x4;
542 output[2] = x6;
543 output[3] = -x2;
544 output[4] = x3;
545 output[5] = -x7;
546 output[6] = x5;
547 output[7] = -x1;
548 }
549
550 void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
551 int dest_stride, int tx_type) {
552 int i, j;
553 DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
554 int16_t *outptr = out;
555 int16_t temp_in[8 * 8], temp_out[8];
556 uint32_t pos = 45;
557
558 /* bit positon for extract from acc */
559 __asm__ __volatile__ (
560 "wrdsp %[pos], 1 \n\t"
561 :
562 : [pos] "r" (pos)
563 );
564
565 switch (tx_type) {
566 case DCT_DCT: // DCT in both horizontal and vertical
567 idct8_rows_dspr2(input, outptr, 8);
568 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
569 break;
570 case ADST_DCT: // ADST in vertical, DCT in horizontal
571 idct8_rows_dspr2(input, outptr, 8);
572
573 for (i = 0; i < 8; ++i) {
574 iadst8_dspr2(&out[i * 8], temp_out);
575
576 for (j = 0; j < 8; ++j)
577 dest[j * dest_stride + i] =
578 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
579 + dest[j * dest_stride + i]);
580 }
581 break;
582 case DCT_ADST: // DCT in vertical, ADST in horizontal
583 for (i = 0; i < 8; ++i) {
584 iadst8_dspr2(input, outptr);
585 input += 8;
586 outptr += 8;
587 }
588
589 for (i = 0; i < 8; ++i) {
590 for (j = 0; j < 8; ++j) {
591 temp_in[i * 8 + j] = out[j * 8 + i];
592 }
593 }
594 idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
595 break;
596 case ADST_ADST: // ADST in both directions
597 for (i = 0; i < 8; ++i) {
598 iadst8_dspr2(input, outptr);
599 input += 8;
600 outptr += 8;
601 }
602
603 for (i = 0; i < 8; ++i) {
604 for (j = 0; j < 8; ++j)
605 temp_in[j] = out[j * 8 + i];
606
607 iadst8_dspr2(temp_in, temp_out);
608
609 for (j = 0; j < 8; ++j)
610 dest[j * dest_stride + i] =
611 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
612 + dest[j * dest_stride + i]);
613 }
614 break;
615 default:
616 printf("vp9_short_iht8x8_add_dspr2 : Invalid tx_type\n");
617 break;
618 }
619 }
620
621 void vp9_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,
622 int dest_stride) { 466 int dest_stride) {
623 DECLARE_ALIGNED(32, int16_t, out[8 * 8]); 467 DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
624 int16_t *outptr = out; 468 int16_t *outptr = out;
625 uint32_t pos = 45; 469 uint32_t pos = 45;
626 470
627 /* bit positon for extract from acc */ 471 /* bit positon for extract from acc */
628 __asm__ __volatile__ ( 472 __asm__ __volatile__ (
629 "wrdsp %[pos], 1 \n\t" 473 "wrdsp %[pos], 1 \n\t"
630 : 474 :
631 : [pos] "r" (pos) 475 : [pos] "r" (pos)
(...skipping 24 matching lines...) Expand all
656 500
657 : 501 :
658 : [outptr] "r" (outptr) 502 : [outptr] "r" (outptr)
659 ); 503 );
660 504
661 505
662 // Then transform columns and add to dest 506 // Then transform columns and add to dest
663 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); 507 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
664 } 508 }
665 509
666 void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest, 510 void vpx_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,
667 int dest_stride) { 511 int dest_stride) {
668 uint32_t pos = 45; 512 uint32_t pos = 45;
669 int32_t out; 513 int32_t out;
670 int32_t r; 514 int32_t r;
671 int32_t a1, absa1; 515 int32_t a1, absa1;
672 int32_t t1, t2, vector_a1, vector_1, vector_2; 516 int32_t t1, t2, vector_a1, vector_1, vector_2;
673 517
674 /* bit positon for extract from acc */ 518 /* bit positon for extract from acc */
675 __asm__ __volatile__ ( 519 __asm__ __volatile__ (
676 "wrdsp %[pos], 1 \n\t" 520 "wrdsp %[pos], 1 \n\t"
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
736 "add %[dest], %[dest], %[dest_stride] \n\t" 580 "add %[dest], %[dest], %[dest_stride] \n\t"
737 581
738 : [t1] "=&r" (t1), [t2] "=&r" (t2), 582 : [t1] "=&r" (t1), [t2] "=&r" (t2),
739 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2), 583 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2),
740 [dest] "+r" (dest) 584 [dest] "+r" (dest)
741 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) 585 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)
742 ); 586 );
743 } 587 }
744 } 588 }
745 } 589 }
746 #endif // #if HAVE_DSPR2 590
591 void iadst8_dspr2(const int16_t *input, int16_t *output) {
592 int s0, s1, s2, s3, s4, s5, s6, s7;
593 int x0, x1, x2, x3, x4, x5, x6, x7;
594
595 x0 = input[7];
596 x1 = input[0];
597 x2 = input[5];
598 x3 = input[2];
599 x4 = input[3];
600 x5 = input[4];
601 x6 = input[1];
602 x7 = input[6];
603
604 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
605 output[0] = output[1] = output[2] = output[3] = output[4]
606 = output[5] = output[6] = output[7] = 0;
607 return;
608 }
609
610 // stage 1
611 s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
612 s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
613 s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
614 s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
615 s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
616 s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
617 s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
618 s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
619
620 x0 = ROUND_POWER_OF_TWO((s0 + s4), DCT_CONST_BITS);
621 x1 = ROUND_POWER_OF_TWO((s1 + s5), DCT_CONST_BITS);
622 x2 = ROUND_POWER_OF_TWO((s2 + s6), DCT_CONST_BITS);
623 x3 = ROUND_POWER_OF_TWO((s3 + s7), DCT_CONST_BITS);
624 x4 = ROUND_POWER_OF_TWO((s0 - s4), DCT_CONST_BITS);
625 x5 = ROUND_POWER_OF_TWO((s1 - s5), DCT_CONST_BITS);
626 x6 = ROUND_POWER_OF_TWO((s2 - s6), DCT_CONST_BITS);
627 x7 = ROUND_POWER_OF_TWO((s3 - s7), DCT_CONST_BITS);
628
629 // stage 2
630 s0 = x0;
631 s1 = x1;
632 s2 = x2;
633 s3 = x3;
634 s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
635 s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
636 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
637 s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
638
639 x0 = s0 + s2;
640 x1 = s1 + s3;
641 x2 = s0 - s2;
642 x3 = s1 - s3;
643 x4 = ROUND_POWER_OF_TWO((s4 + s6), DCT_CONST_BITS);
644 x5 = ROUND_POWER_OF_TWO((s5 + s7), DCT_CONST_BITS);
645 x6 = ROUND_POWER_OF_TWO((s4 - s6), DCT_CONST_BITS);
646 x7 = ROUND_POWER_OF_TWO((s5 - s7), DCT_CONST_BITS);
647
648 // stage 3
649 s2 = cospi_16_64 * (x2 + x3);
650 s3 = cospi_16_64 * (x2 - x3);
651 s6 = cospi_16_64 * (x6 + x7);
652 s7 = cospi_16_64 * (x6 - x7);
653
654 x2 = ROUND_POWER_OF_TWO((s2), DCT_CONST_BITS);
655 x3 = ROUND_POWER_OF_TWO((s3), DCT_CONST_BITS);
656 x6 = ROUND_POWER_OF_TWO((s6), DCT_CONST_BITS);
657 x7 = ROUND_POWER_OF_TWO((s7), DCT_CONST_BITS);
658
659 output[0] = x0;
660 output[1] = -x4;
661 output[2] = x6;
662 output[3] = -x2;
663 output[4] = x3;
664 output[5] = -x7;
665 output[6] = x5;
666 output[7] = -x1;
667 }
668 #endif // HAVE_DSPR2
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/mips/itrans4_dspr2.c ('k') | source/libvpx/vpx_dsp/mips/macros_msa.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698