source/libvpx/vpx_dsp/mips/itrans8_dspr2.c - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/mips/itrans8_dspr2.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include <assert.h>

12 #include <stdio.h>

13

14 #include "./vpx_config.h"	11 #include "./vpx_config.h"

15 #include "./vp9_rtcd.h"	12 #include "./vpx_dsp_rtcd.h"

16 #include "vp9/common/vp9_common.h"	13 #include "vpx_dsp/mips/inv_txfm_dspr2.h"

17 #include "vp9/common/vp9_blockd.h"

18 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h"

19 #include "vpx_dsp/txfm_common.h"	14 #include "vpx_dsp/txfm_common.h"

20 #include "vpx_ports/mem.h"

21	15

22 #if HAVE_DSPR2	16 #if HAVE_DSPR2

23 static void idct8_rows_dspr2(const int16_t input, int16_t output,	17 void idct8_rows_dspr2(const int16_t input, int16_t output, uint32_t no_rows) {

24 uint32_t no_rows) {

25 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;	18 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;

26 const int const_2_power_13 = 8192;	19 const int const_2_power_13 = 8192;

27 int Temp0, Temp1, Temp2, Temp3, Temp4;	20 int Temp0, Temp1, Temp2, Temp3, Temp4;

28 int i;	21 int i;

29	22

30 for (i = no_rows; i--; ) {	23 for (i = no_rows; i--; ) {

31 __asm__ __volatile__ (	24 __asm__ __volatile__ (

32 /*	25 /*

33 temp_1 = (input[0] + input[4]) * cospi_16_64;	26 temp_1 = (input[0] + input[4]) * cospi_16_64;

34 step2_0 = dct_const_round_shift(temp_1);	27 step2_0 = dct_const_round_shift(temp_1);

(...skipping 159 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
194 [cospi_20_64] "r" (cospi_20_64), [cospi_8_64] "r" (cospi_8_64),	187 [cospi_20_64] "r" (cospi_20_64), [cospi_8_64] "r" (cospi_8_64),

195 [cospi_24_64] "r" (cospi_24_64),	188 [cospi_24_64] "r" (cospi_24_64),

196 [output] "r" (output), [input] "r" (input)	189 [output] "r" (output), [input] "r" (input)

197 );	190 );

198	191

199 input += 8;	192 input += 8;

200 output += 1;	193 output += 1;

201 }	194 }

202 }	195 }

203	196

204 static void idct8_columns_add_blk_dspr2(int16_t input, uint8_t dest,	197 void idct8_columns_add_blk_dspr2(int16_t input, uint8_t dest,

205 int dest_stride) {	198 int dest_stride) {

206 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;	199 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;

207 int Temp0, Temp1, Temp2, Temp3;	200 int Temp0, Temp1, Temp2, Temp3;

208 int i;	201 int i;

209 const int const_2_power_13 = 8192;	202 const int const_2_power_13 = 8192;

210 uint8_t *dest_pix;	203 uint8_t *dest_pix;

211 uint8_t *cm = vpx_ff_cropTbl;	204 uint8_t *cm = vpx_ff_cropTbl;

212	205

213 /* prefetch vpx_ff_cropTbl */	206 /* prefetch vpx_ff_cropTbl */

214 prefetch_load(vpx_ff_cropTbl);	207 prefetch_load(vpx_ff_cropTbl);

215 prefetch_load(vpx_ff_cropTbl + 32);	208 prefetch_load(vpx_ff_cropTbl + 32);

(...skipping 226 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
442 [cospi_4_64] "r" (cospi_4_64), [cospi_12_64] "r" (cospi_12_64),	435 [cospi_4_64] "r" (cospi_4_64), [cospi_12_64] "r" (cospi_12_64),

443 [cospi_20_64] "r" (cospi_20_64), [cospi_8_64] "r" (cospi_8_64),	436 [cospi_20_64] "r" (cospi_20_64), [cospi_8_64] "r" (cospi_8_64),

444 [cospi_24_64] "r" (cospi_24_64),	437 [cospi_24_64] "r" (cospi_24_64),

445 [input] "r" (input), [cm] "r" (cm), [dest_stride] "r" (dest_stride)	438 [input] "r" (input), [cm] "r" (cm), [dest_stride] "r" (dest_stride)

446 );	439 );

447	440

448 input += 8;	441 input += 8;

449 }	442 }

450 }	443 }

451	444

452 void vp9_idct8x8_64_add_dspr2(const int16_t input, uint8_t dest,	445 void vpx_idct8x8_64_add_dspr2(const int16_t input, uint8_t dest,

453 int dest_stride) {	446 int dest_stride) {

454 DECLARE_ALIGNED(32, int16_t, out[8 * 8]);	447 DECLARE_ALIGNED(32, int16_t, out[8 * 8]);

455 int16_t *outptr = out;	448 int16_t *outptr = out;

456 uint32_t pos = 45;	449 uint32_t pos = 45;

457	450

458 /* bit positon for extract from acc */	451 /* bit positon for extract from acc */

459 __asm__ __volatile__ (	452 __asm__ __volatile__ (

460 "wrdsp %[pos], 1 \n\t"	453 "wrdsp %[pos], 1 \n\t"

461 :	454 :

462 : [pos] "r" (pos)	455 : [pos] "r" (pos)

463 );	456 );

464	457

465 // First transform rows	458 // First transform rows

466 idct8_rows_dspr2(input, outptr, 8);	459 idct8_rows_dspr2(input, outptr, 8);

467	460

468 // Then transform columns and add to dest	461 // Then transform columns and add to dest

469 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);	462 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);

470 }	463 }

471	464

472 static void iadst8_dspr2(const int16_t input, int16_t output) {	465 void vpx_idct8x8_12_add_dspr2(const int16_t input, uint8_t dest,

473 int s0, s1, s2, s3, s4, s5, s6, s7;

474 int x0, x1, x2, x3, x4, x5, x6, x7;

475

476 x0 = input[7];

477 x1 = input[0];

478 x2 = input[5];

479 x3 = input[2];

480 x4 = input[3];

481 x5 = input[4];

482 x6 = input[1];

483 x7 = input[6];

484

485 if (!(x0 \| x1 \| x2 \| x3 \| x4 \| x5 \| x6 \| x7)) {

486 output[0] = output[1] = output[2] = output[3] = output[4]

487 = output[5] = output[6] = output[7] = 0;

488 return;

489 }

490

491 // stage 1

492 s0 = cospi_2_64 * x0 + cospi_30_64 * x1;

493 s1 = cospi_30_64 * x0 - cospi_2_64 * x1;

494 s2 = cospi_10_64 * x2 + cospi_22_64 * x3;

495 s3 = cospi_22_64 * x2 - cospi_10_64 * x3;

496 s4 = cospi_18_64 * x4 + cospi_14_64 * x5;

497 s5 = cospi_14_64 * x4 - cospi_18_64 * x5;

498 s6 = cospi_26_64 * x6 + cospi_6_64 * x7;

499 s7 = cospi_6_64 * x6 - cospi_26_64 * x7;

500

501 x0 = ROUND_POWER_OF_TWO((s0 + s4), DCT_CONST_BITS);

502 x1 = ROUND_POWER_OF_TWO((s1 + s5), DCT_CONST_BITS);

503 x2 = ROUND_POWER_OF_TWO((s2 + s6), DCT_CONST_BITS);

504 x3 = ROUND_POWER_OF_TWO((s3 + s7), DCT_CONST_BITS);

505 x4 = ROUND_POWER_OF_TWO((s0 - s4), DCT_CONST_BITS);

506 x5 = ROUND_POWER_OF_TWO((s1 - s5), DCT_CONST_BITS);

507 x6 = ROUND_POWER_OF_TWO((s2 - s6), DCT_CONST_BITS);

508 x7 = ROUND_POWER_OF_TWO((s3 - s7), DCT_CONST_BITS);

509

510 // stage 2

511 s0 = x0;

512 s1 = x1;

513 s2 = x2;

514 s3 = x3;

515 s4 = cospi_8_64 * x4 + cospi_24_64 * x5;

516 s5 = cospi_24_64 * x4 - cospi_8_64 * x5;

517 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;

518 s7 = cospi_8_64 * x6 + cospi_24_64 * x7;

519

520 x0 = s0 + s2;

521 x1 = s1 + s3;

522 x2 = s0 - s2;

523 x3 = s1 - s3;

524 x4 = ROUND_POWER_OF_TWO((s4 + s6), DCT_CONST_BITS);

525 x5 = ROUND_POWER_OF_TWO((s5 + s7), DCT_CONST_BITS);

526 x6 = ROUND_POWER_OF_TWO((s4 - s6), DCT_CONST_BITS);

527 x7 = ROUND_POWER_OF_TWO((s5 - s7), DCT_CONST_BITS);

528

529 // stage 3

530 s2 = cospi_16_64 * (x2 + x3);

531 s3 = cospi_16_64 * (x2 - x3);

532 s6 = cospi_16_64 * (x6 + x7);

533 s7 = cospi_16_64 * (x6 - x7);

534

535 x2 = ROUND_POWER_OF_TWO((s2), DCT_CONST_BITS);

536 x3 = ROUND_POWER_OF_TWO((s3), DCT_CONST_BITS);

537 x6 = ROUND_POWER_OF_TWO((s6), DCT_CONST_BITS);

538 x7 = ROUND_POWER_OF_TWO((s7), DCT_CONST_BITS);

539

540 output[0] = x0;

541 output[1] = -x4;

542 output[2] = x6;

543 output[3] = -x2;

544 output[4] = x3;

545 output[5] = -x7;

546 output[6] = x5;

547 output[7] = -x1;

548 }

549

550 void vp9_iht8x8_64_add_dspr2(const int16_t input, uint8_t dest,

551 int dest_stride, int tx_type) {

552 int i, j;

553 DECLARE_ALIGNED(32, int16_t, out[8 * 8]);

554 int16_t *outptr = out;

555 int16_t temp_in[8 * 8], temp_out[8];

556 uint32_t pos = 45;

557

558 /* bit positon for extract from acc */

559 __asm__ __volatile__ (

560 "wrdsp %[pos], 1 \n\t"

561 :

562 : [pos] "r" (pos)

563 );

564

565 switch (tx_type) {

566 case DCT_DCT: // DCT in both horizontal and vertical

567 idct8_rows_dspr2(input, outptr, 8);

568 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);

569 break;

570 case ADST_DCT: // ADST in vertical, DCT in horizontal

571 idct8_rows_dspr2(input, outptr, 8);

572

573 for (i = 0; i < 8; ++i) {

574 iadst8_dspr2(&out[i * 8], temp_out);

575

576 for (j = 0; j < 8; ++j)

577 dest[j * dest_stride + i] =

578 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)

579 + dest[j * dest_stride + i]);

580 }

581 break;

582 case DCT_ADST: // DCT in vertical, ADST in horizontal

583 for (i = 0; i < 8; ++i) {

584 iadst8_dspr2(input, outptr);

585 input += 8;

586 outptr += 8;

587 }

588

589 for (i = 0; i < 8; ++i) {

590 for (j = 0; j < 8; ++j) {

591 temp_in[i * 8 + j] = out[j * 8 + i];

592 }

593 }

594 idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);

595 break;

596 case ADST_ADST: // ADST in both directions

597 for (i = 0; i < 8; ++i) {

598 iadst8_dspr2(input, outptr);

599 input += 8;

600 outptr += 8;

601 }

602

603 for (i = 0; i < 8; ++i) {

604 for (j = 0; j < 8; ++j)

605 temp_in[j] = out[j * 8 + i];

606

607 iadst8_dspr2(temp_in, temp_out);

608

609 for (j = 0; j < 8; ++j)

610 dest[j * dest_stride + i] =

611 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)

612 + dest[j * dest_stride + i]);

613 }

614 break;

615 default:

616 printf("vp9_short_iht8x8_add_dspr2 : Invalid tx_type\n");

617 break;

618 }

619 }

620

621 void vp9_idct8x8_12_add_dspr2(const int16_t input, uint8_t dest,

622 int dest_stride) {	466 int dest_stride) {

623 DECLARE_ALIGNED(32, int16_t, out[8 * 8]);	467 DECLARE_ALIGNED(32, int16_t, out[8 * 8]);

624 int16_t *outptr = out;	468 int16_t *outptr = out;

625 uint32_t pos = 45;	469 uint32_t pos = 45;

626	470

627 /* bit positon for extract from acc */	471 /* bit positon for extract from acc */

628 __asm__ __volatile__ (	472 __asm__ __volatile__ (

629 "wrdsp %[pos], 1 \n\t"	473 "wrdsp %[pos], 1 \n\t"

630 :	474 :

631 : [pos] "r" (pos)	475 : [pos] "r" (pos)

(...skipping 24 matching lines...) Expand all Loading...
656	500

657 :	501 :

658 : [outptr] "r" (outptr)	502 : [outptr] "r" (outptr)

659 );	503 );

660	504

661	505

662 // Then transform columns and add to dest	506 // Then transform columns and add to dest

663 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);	507 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);

664 }	508 }

665	509

666 void vp9_idct8x8_1_add_dspr2(const int16_t input, uint8_t dest,	510 void vpx_idct8x8_1_add_dspr2(const int16_t input, uint8_t dest,

667 int dest_stride) {	511 int dest_stride) {

668 uint32_t pos = 45;	512 uint32_t pos = 45;

669 int32_t out;	513 int32_t out;

670 int32_t r;	514 int32_t r;

671 int32_t a1, absa1;	515 int32_t a1, absa1;

672 int32_t t1, t2, vector_a1, vector_1, vector_2;	516 int32_t t1, t2, vector_a1, vector_1, vector_2;

673	517

674 /* bit positon for extract from acc */	518 /* bit positon for extract from acc */

675 __asm__ __volatile__ (	519 __asm__ __volatile__ (

676 "wrdsp %[pos], 1 \n\t"	520 "wrdsp %[pos], 1 \n\t"

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
736 "add %[dest], %[dest], %[dest_stride] \n\t"	580 "add %[dest], %[dest], %[dest_stride] \n\t"

737	581

738 : [t1] "=&r" (t1), [t2] "=&r" (t2),	582 : [t1] "=&r" (t1), [t2] "=&r" (t2),

739 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2),	583 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2),

740 [dest] "+r" (dest)	584 [dest] "+r" (dest)

741 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)	585 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)

742 );	586 );

743 }	587 }

744 }	588 }

745 }	589 }

746 #endif // #if HAVE_DSPR2	590

	591 void iadst8_dspr2(const int16_t input, int16_t output) {

	592 int s0, s1, s2, s3, s4, s5, s6, s7;

	593 int x0, x1, x2, x3, x4, x5, x6, x7;

	594

	595 x0 = input[7];

	596 x1 = input[0];

	597 x2 = input[5];

	598 x3 = input[2];

	599 x4 = input[3];

	600 x5 = input[4];

	601 x6 = input[1];

	602 x7 = input[6];

	603

	604 if (!(x0 \| x1 \| x2 \| x3 \| x4 \| x5 \| x6 \| x7)) {

	605 output[0] = output[1] = output[2] = output[3] = output[4]

	606 = output[5] = output[6] = output[7] = 0;

	607 return;

	608 }

	609

	610 // stage 1

	611 s0 = cospi_2_64 * x0 + cospi_30_64 * x1;

	612 s1 = cospi_30_64 * x0 - cospi_2_64 * x1;

	613 s2 = cospi_10_64 * x2 + cospi_22_64 * x3;

	614 s3 = cospi_22_64 * x2 - cospi_10_64 * x3;

	615 s4 = cospi_18_64 * x4 + cospi_14_64 * x5;

	616 s5 = cospi_14_64 * x4 - cospi_18_64 * x5;

	617 s6 = cospi_26_64 * x6 + cospi_6_64 * x7;

	618 s7 = cospi_6_64 * x6 - cospi_26_64 * x7;

	619

	620 x0 = ROUND_POWER_OF_TWO((s0 + s4), DCT_CONST_BITS);

	621 x1 = ROUND_POWER_OF_TWO((s1 + s5), DCT_CONST_BITS);

	622 x2 = ROUND_POWER_OF_TWO((s2 + s6), DCT_CONST_BITS);

	623 x3 = ROUND_POWER_OF_TWO((s3 + s7), DCT_CONST_BITS);

	624 x4 = ROUND_POWER_OF_TWO((s0 - s4), DCT_CONST_BITS);

	625 x5 = ROUND_POWER_OF_TWO((s1 - s5), DCT_CONST_BITS);

	626 x6 = ROUND_POWER_OF_TWO((s2 - s6), DCT_CONST_BITS);

	627 x7 = ROUND_POWER_OF_TWO((s3 - s7), DCT_CONST_BITS);

	628

	629 // stage 2

	630 s0 = x0;

	631 s1 = x1;

	632 s2 = x2;

	633 s3 = x3;

	634 s4 = cospi_8_64 * x4 + cospi_24_64 * x5;

	635 s5 = cospi_24_64 * x4 - cospi_8_64 * x5;

	636 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;

	637 s7 = cospi_8_64 * x6 + cospi_24_64 * x7;

	638

	639 x0 = s0 + s2;

	640 x1 = s1 + s3;

	641 x2 = s0 - s2;

	642 x3 = s1 - s3;

	643 x4 = ROUND_POWER_OF_TWO((s4 + s6), DCT_CONST_BITS);

	644 x5 = ROUND_POWER_OF_TWO((s5 + s7), DCT_CONST_BITS);

	645 x6 = ROUND_POWER_OF_TWO((s4 - s6), DCT_CONST_BITS);

	646 x7 = ROUND_POWER_OF_TWO((s5 - s7), DCT_CONST_BITS);

	647

	648 // stage 3

	649 s2 = cospi_16_64 * (x2 + x3);

	650 s3 = cospi_16_64 * (x2 - x3);

	651 s6 = cospi_16_64 * (x6 + x7);

	652 s7 = cospi_16_64 * (x6 - x7);

	653

	654 x2 = ROUND_POWER_OF_TWO((s2), DCT_CONST_BITS);

	655 x3 = ROUND_POWER_OF_TWO((s3), DCT_CONST_BITS);

	656 x6 = ROUND_POWER_OF_TWO((s6), DCT_CONST_BITS);

	657 x7 = ROUND_POWER_OF_TWO((s7), DCT_CONST_BITS);

	658

	659 output[0] = x0;

	660 output[1] = -x4;

	661 output[2] = x6;

	662 output[3] = -x2;

	663 output[4] = x3;

	664 output[5] = -x7;

	665 output[6] = x5;

	666 output[7] = -x1;

	667 }

	668 #endif // HAVE_DSPR2

OLD	NEW

« no previous file with comments | « source/libvpx/vpx_dsp/mips/itrans4_dspr2.c ('k') | source/libvpx/vpx_dsp/mips/macros_msa.h » ('j') | no next file with comments »