source/libvpx/vpx_dsp/mips/itrans4_dspr2.c - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/mips/itrans4_dspr2.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include <assert.h>

12 #include <stdio.h>

13

14 #include "./vpx_config.h"	11 #include "./vpx_config.h"

15 #include "./vp9_rtcd.h"	12 #include "./vpx_dsp_rtcd.h"

16 #include "vp9/common/vp9_common.h"	13 #include "vpx_dsp/mips/inv_txfm_dspr2.h"

17 #include "vp9/common/vp9_blockd.h"

18 #include "vp9/common/vp9_idct.h"

19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h"

20 #include "vpx_dsp/txfm_common.h"	14 #include "vpx_dsp/txfm_common.h"

21 #include "vpx_ports/mem.h"

22	15

23 #if HAVE_DSPR2	16 #if HAVE_DSPR2

24 static void vp9_idct4_rows_dspr2(const int16_t input, int16_t output) {	17 void vpx_idct4_rows_dspr2(const int16_t input, int16_t output) {

25 int16_t step_0, step_1, step_2, step_3;	18 int16_t step_0, step_1, step_2, step_3;

26 int Temp0, Temp1, Temp2, Temp3;	19 int Temp0, Temp1, Temp2, Temp3;

27 const int const_2_power_13 = 8192;	20 const int const_2_power_13 = 8192;

28 int i;	21 int i;

29	22

30 for (i = 4; i--; ) {	23 for (i = 4; i--; ) {

31 __asm__ __volatile__ (	24 __asm__ __volatile__ (

32 /*	25 /*

33 temp_1 = (input[0] + input[2]) * cospi_16_64;	26 temp_1 = (input[0] + input[2]) * cospi_16_64;

34 step_0 = dct_const_round_shift(temp_1);	27 step_0 = dct_const_round_shift(temp_1);

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
99 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64),	92 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64),

100 [cospi_24_64] "r" (cospi_24_64),	93 [cospi_24_64] "r" (cospi_24_64),

101 [input] "r" (input)	94 [input] "r" (input)

102 );	95 );

103	96

104 input += 4;	97 input += 4;

105 output += 1;	98 output += 1;

106 }	99 }

107 }	100 }

108	101

109 static void vp9_idct4_columns_add_blk_dspr2(int16_t input, uint8_t dest,	102 void vpx_idct4_columns_add_blk_dspr2(int16_t input, uint8_t dest,

110 int dest_stride) {	103 int dest_stride) {

111 int16_t step_0, step_1, step_2, step_3;	104 int16_t step_0, step_1, step_2, step_3;

112 int Temp0, Temp1, Temp2, Temp3;	105 int Temp0, Temp1, Temp2, Temp3;

113 const int const_2_power_13 = 8192;	106 const int const_2_power_13 = 8192;

114 int i;	107 int i;

115 uint8_t *dest_pix;	108 uint8_t *dest_pix;

116 uint8_t *cm = vpx_ff_cropTbl;	109 uint8_t *cm = vpx_ff_cropTbl;

117	110

118 /* prefetch vpx_ff_cropTbl */	111 /* prefetch vpx_ff_cropTbl */

119 prefetch_load(vpx_ff_cropTbl);	112 prefetch_load(vpx_ff_cropTbl);

120 prefetch_load(vpx_ff_cropTbl + 32);	113 prefetch_load(vpx_ff_cropTbl + 32);

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
221 : [const_2_power_13] "r" (const_2_power_13),	214 : [const_2_power_13] "r" (const_2_power_13),

222 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64),	215 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64),

223 [cospi_24_64] "r" (cospi_24_64),	216 [cospi_24_64] "r" (cospi_24_64),

224 [input] "r" (input), [cm] "r" (cm), [dest_stride] "r" (dest_stride)	217 [input] "r" (input), [cm] "r" (cm), [dest_stride] "r" (dest_stride)

225 );	218 );

226	219

227 input += 4;	220 input += 4;

228 }	221 }

229 }	222 }

230	223

231 void vp9_idct4x4_16_add_dspr2(const int16_t input, uint8_t dest,	224 void vpx_idct4x4_16_add_dspr2(const int16_t input, uint8_t dest,

232 int dest_stride) {	225 int dest_stride) {

233 DECLARE_ALIGNED(32, int16_t, out[4 * 4]);	226 DECLARE_ALIGNED(32, int16_t, out[4 * 4]);

234 int16_t *outptr = out;	227 int16_t *outptr = out;

235 uint32_t pos = 45;	228 uint32_t pos = 45;

236	229

237 /* bit positon for extract from acc */	230 /* bit positon for extract from acc */

238 __asm__ __volatile__ (	231 __asm__ __volatile__ (

239 "wrdsp %[pos], 1 \n\t"	232 "wrdsp %[pos], 1 \n\t"

240 :	233 :

241 : [pos] "r" (pos)	234 : [pos] "r" (pos)

242 );	235 );

243	236

244 // Rows	237 // Rows

245 vp9_idct4_rows_dspr2(input, outptr);	238 vpx_idct4_rows_dspr2(input, outptr);

246	239

247 // Columns	240 // Columns

248 vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);	241 vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);

249 }	242 }

250	243

251 void vp9_idct4x4_1_add_dspr2(const int16_t input, uint8_t dest,	244 void vpx_idct4x4_1_add_dspr2(const int16_t input, uint8_t dest,

252 int dest_stride) {	245 int dest_stride) {

253 int a1, absa1;	246 int a1, absa1;

254 int r;	247 int r;

255 int32_t out;	248 int32_t out;

256 int t2, vector_a1, vector_a;	249 int t2, vector_a1, vector_a;

257 uint32_t pos = 45;	250 uint32_t pos = 45;

258 int16_t input_dc = input[0];	251 int16_t input_dc = input[0];

259	252

260 /* bit positon for extract from acc */	253 /* bit positon for extract from acc */

261 __asm__ __volatile__ (	254 __asm__ __volatile__ (

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
314 "add %[dest], %[dest], %[dest_stride] \n\t"	307 "add %[dest], %[dest], %[dest_stride] \n\t"

315	308

316 : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),	309 : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),

317 [dest] "+&r" (dest)	310 [dest] "+&r" (dest)

318 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)	311 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)

319 );	312 );

320 }	313 }

321 }	314 }

322 }	315 }

323	316

324 static void iadst4_dspr2(const int16_t input, int16_t output) {	317 void iadst4_dspr2(const int16_t input, int16_t output) {

325 int s0, s1, s2, s3, s4, s5, s6, s7;	318 int s0, s1, s2, s3, s4, s5, s6, s7;

326 int x0, x1, x2, x3;	319 int x0, x1, x2, x3;

327	320

328 x0 = input[0];	321 x0 = input[0];

329 x1 = input[1];	322 x1 = input[1];

330 x2 = input[2];	323 x2 = input[2];

331 x3 = input[3];	324 x3 = input[3];

332	325

333 if (!(x0 \| x1 \| x2 \| x3)) {	326 if (!(x0 \| x1 \| x2 \| x3)) {

334 output[0] = output[1] = output[2] = output[3] = 0;	327 output[0] = output[1] = output[2] = output[3] = 0;

(...skipping 21 matching lines...) Expand all Loading...
356	349

357 // 1-D transform scaling factor is sqrt(2).	350 // 1-D transform scaling factor is sqrt(2).

358 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)	351 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)

359 // + 1b (addition) = 29b.	352 // + 1b (addition) = 29b.

360 // Hence the output bit depth is 15b.	353 // Hence the output bit depth is 15b.

361 output[0] = dct_const_round_shift(s0);	354 output[0] = dct_const_round_shift(s0);

362 output[1] = dct_const_round_shift(s1);	355 output[1] = dct_const_round_shift(s1);

363 output[2] = dct_const_round_shift(s2);	356 output[2] = dct_const_round_shift(s2);

364 output[3] = dct_const_round_shift(s3);	357 output[3] = dct_const_round_shift(s3);

365 }	358 }

366

367 void vp9_iht4x4_16_add_dspr2(const int16_t input, uint8_t dest,

368 int dest_stride, int tx_type) {

369 int i, j;

370 DECLARE_ALIGNED(32, int16_t, out[4 * 4]);

371 int16_t *outptr = out;

372 int16_t temp_in[4 * 4], temp_out[4];

373 uint32_t pos = 45;

374

375 /* bit positon for extract from acc */

376 __asm__ __volatile__ (

377 "wrdsp %[pos], 1 \n\t"

378 :

379 : [pos] "r" (pos)

380 );

381

382 switch (tx_type) {

383 case DCT_DCT: // DCT in both horizontal and vertical

384 vp9_idct4_rows_dspr2(input, outptr);

385 vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);

386 break;

387 case ADST_DCT: // ADST in vertical, DCT in horizontal

388 vp9_idct4_rows_dspr2(input, outptr);

389

390 outptr = out;

391

392 for (i = 0; i < 4; ++i) {

393 iadst4_dspr2(outptr, temp_out);

394

395 for (j = 0; j < 4; ++j)

396 dest[j * dest_stride + i] =

397 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)

398 + dest[j * dest_stride + i]);

399

400 outptr += 4;

401 }

402 break;

403 case DCT_ADST: // DCT in vertical, ADST in horizontal

404 for (i = 0; i < 4; ++i) {

405 iadst4_dspr2(input, outptr);

406 input += 4;

407 outptr += 4;

408 }

409

410 for (i = 0; i < 4; ++i) {

411 for (j = 0; j < 4; ++j) {

412 temp_in[i * 4 + j] = out[j * 4 + i];

413 }

414 }

415 vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);

416 break;

417 case ADST_ADST: // ADST in both directions

418 for (i = 0; i < 4; ++i) {

419 iadst4_dspr2(input, outptr);

420 input += 4;

421 outptr += 4;

422 }

423

424 for (i = 0; i < 4; ++i) {

425 for (j = 0; j < 4; ++j)

426 temp_in[j] = out[j * 4 + i];

427 iadst4_dspr2(temp_in, temp_out);

428

429 for (j = 0; j < 4; ++j)

430 dest[j * dest_stride + i] =

431 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)

432 + dest[j * dest_stride + i]);

433 }

434 break;

435 default:

436 printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n");

437 break;

438 }

439 }

440 #endif // #if HAVE_DSPR2	359 #endif // #if HAVE_DSPR2

OLD	NEW

« no previous file with comments | « source/libvpx/vpx_dsp/mips/itrans32_dspr2.c ('k') | source/libvpx/vpx_dsp/mips/itrans8_dspr2.c » ('j') | no next file with comments »