| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <assert.h> | 11 #include <assert.h> |
| 12 #include <stdio.h> | 12 #include <stdio.h> |
| 13 | 13 |
| 14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
| 15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
| 16 #include "vp9/common/vp9_common.h" | 16 #include "vp9/common/vp9_common.h" |
| 17 #include "vp9/common/vp9_blockd.h" | 17 #include "vp9/common/vp9_blockd.h" |
| 18 #include "vp9/common/vp9_idct.h" | 18 #include "vp9/common/vp9_idct.h" |
| 19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h" | 19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h" |
| 20 | 20 |
| 21 #if HAVE_DSPR2 | 21 #if HAVE_DSPR2 |
| 22 static void vp9_idct4_1d_rows_dspr2(const int16_t *input, int16_t *output) { | 22 static void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) { |
| 23 int16_t step_0, step_1, step_2, step_3; | 23 int16_t step_0, step_1, step_2, step_3; |
| 24 int Temp0, Temp1, Temp2, Temp3; | 24 int Temp0, Temp1, Temp2, Temp3; |
| 25 const int const_2_power_13 = 8192; | 25 const int const_2_power_13 = 8192; |
| 26 int i; | 26 int i; |
| 27 | 27 |
| 28 for (i = 4; i--; ) { | 28 for (i = 4; i--; ) { |
| 29 __asm__ __volatile__ ( | 29 __asm__ __volatile__ ( |
| 30 /* | 30 /* |
| 31 temp_1 = (input[0] + input[2]) * cospi_16_64; | 31 temp_1 = (input[0] + input[2]) * cospi_16_64; |
| 32 step_0 = dct_const_round_shift(temp_1); | 32 step_0 = dct_const_round_shift(temp_1); |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 97 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64), | 97 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64), |
| 98 [cospi_24_64] "r" (cospi_24_64), | 98 [cospi_24_64] "r" (cospi_24_64), |
| 99 [input] "r" (input) | 99 [input] "r" (input) |
| 100 ); | 100 ); |
| 101 | 101 |
| 102 input += 4; | 102 input += 4; |
| 103 output += 1; | 103 output += 1; |
| 104 } | 104 } |
| 105 } | 105 } |
| 106 | 106 |
| 107 static void vp9_idct4_1d_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, | 107 static void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, |
| 108 int dest_stride) { | 108 int dest_stride) { |
| 109 int16_t step_0, step_1, step_2, step_3; | 109 int16_t step_0, step_1, step_2, step_3; |
| 110 int Temp0, Temp1, Temp2, Temp3; | 110 int Temp0, Temp1, Temp2, Temp3; |
| 111 const int const_2_power_13 = 8192; | 111 const int const_2_power_13 = 8192; |
| 112 int i; | 112 int i; |
| 113 uint8_t *dest_pix; | 113 uint8_t *dest_pix; |
| 114 uint8_t *cm = vp9_ff_cropTbl; | 114 uint8_t *cm = vp9_ff_cropTbl; |
| 115 | 115 |
| 116 /* prefetch vp9_ff_cropTbl */ | 116 /* prefetch vp9_ff_cropTbl */ |
| 117 vp9_prefetch_load(vp9_ff_cropTbl); | 117 vp9_prefetch_load(vp9_ff_cropTbl); |
| (...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 233 uint32_t pos = 45; | 233 uint32_t pos = 45; |
| 234 | 234 |
| 235 /* bit positon for extract from acc */ | 235 /* bit positon for extract from acc */ |
| 236 __asm__ __volatile__ ( | 236 __asm__ __volatile__ ( |
| 237 "wrdsp %[pos], 1 \n\t" | 237 "wrdsp %[pos], 1 \n\t" |
| 238 : | 238 : |
| 239 : [pos] "r" (pos) | 239 : [pos] "r" (pos) |
| 240 ); | 240 ); |
| 241 | 241 |
| 242 // Rows | 242 // Rows |
| 243 vp9_idct4_1d_rows_dspr2(input, outptr); | 243 vp9_idct4_rows_dspr2(input, outptr); |
| 244 | 244 |
| 245 // Columns | 245 // Columns |
| 246 vp9_idct4_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride); | 246 vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); |
| 247 } | 247 } |
| 248 | 248 |
| 249 void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, | 249 void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, |
| 250 int dest_stride) { | 250 int dest_stride) { |
| 251 int a1, absa1; | 251 int a1, absa1; |
| 252 int r; | 252 int r; |
| 253 int32_t out; | 253 int32_t out; |
| 254 int t2, vector_a1, vector_a; | 254 int t2, vector_a1, vector_a; |
| 255 uint32_t pos = 45; | 255 uint32_t pos = 45; |
| 256 int16_t input_dc = input[0]; | 256 int16_t input_dc = input[0]; |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 312 "add %[dest], %[dest], %[dest_stride] \n\t" | 312 "add %[dest], %[dest], %[dest_stride] \n\t" |
| 313 | 313 |
| 314 : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), | 314 : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), |
| 315 [dest] "+&r" (dest) | 315 [dest] "+&r" (dest) |
| 316 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) | 316 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) |
| 317 ); | 317 ); |
| 318 } | 318 } |
| 319 } | 319 } |
| 320 } | 320 } |
| 321 | 321 |
| 322 static void iadst4_1d_dspr2(const int16_t *input, int16_t *output) { | 322 static void iadst4_dspr2(const int16_t *input, int16_t *output) { |
| 323 int s0, s1, s2, s3, s4, s5, s6, s7; | 323 int s0, s1, s2, s3, s4, s5, s6, s7; |
| 324 int x0, x1, x2, x3; | 324 int x0, x1, x2, x3; |
| 325 | 325 |
| 326 x0 = input[0]; | 326 x0 = input[0]; |
| 327 x1 = input[1]; | 327 x1 = input[1]; |
| 328 x2 = input[2]; | 328 x2 = input[2]; |
| 329 x3 = input[3]; | 329 x3 = input[3]; |
| 330 | 330 |
| 331 if (!(x0 | x1 | x2 | x3)) { | 331 if (!(x0 | x1 | x2 | x3)) { |
| 332 output[0] = output[1] = output[2] = output[3] = 0; | 332 output[0] = output[1] = output[2] = output[3] = 0; |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 372 | 372 |
| 373 /* bit positon for extract from acc */ | 373 /* bit positon for extract from acc */ |
| 374 __asm__ __volatile__ ( | 374 __asm__ __volatile__ ( |
| 375 "wrdsp %[pos], 1 \n\t" | 375 "wrdsp %[pos], 1 \n\t" |
| 376 : | 376 : |
| 377 : [pos] "r" (pos) | 377 : [pos] "r" (pos) |
| 378 ); | 378 ); |
| 379 | 379 |
| 380 switch (tx_type) { | 380 switch (tx_type) { |
| 381 case DCT_DCT: // DCT in both horizontal and vertical | 381 case DCT_DCT: // DCT in both horizontal and vertical |
| 382 vp9_idct4_1d_rows_dspr2(input, outptr); | 382 vp9_idct4_rows_dspr2(input, outptr); |
| 383 vp9_idct4_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride); | 383 vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); |
| 384 break; | 384 break; |
| 385 case ADST_DCT: // ADST in vertical, DCT in horizontal | 385 case ADST_DCT: // ADST in vertical, DCT in horizontal |
| 386 vp9_idct4_1d_rows_dspr2(input, outptr); | 386 vp9_idct4_rows_dspr2(input, outptr); |
| 387 | 387 |
| 388 outptr = out; | 388 outptr = out; |
| 389 | 389 |
| 390 for (i = 0; i < 4; ++i) { | 390 for (i = 0; i < 4; ++i) { |
| 391 iadst4_1d_dspr2(outptr, temp_out); | 391 iadst4_dspr2(outptr, temp_out); |
| 392 | 392 |
| 393 for (j = 0; j < 4; ++j) | 393 for (j = 0; j < 4; ++j) |
| 394 dest[j * dest_stride + i] = | 394 dest[j * dest_stride + i] = |
| 395 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | 395 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) |
| 396 + dest[j * dest_stride + i]); | 396 + dest[j * dest_stride + i]); |
| 397 | 397 |
| 398 outptr += 4; | 398 outptr += 4; |
| 399 } | 399 } |
| 400 break; | 400 break; |
| 401 case DCT_ADST: // DCT in vertical, ADST in horizontal | 401 case DCT_ADST: // DCT in vertical, ADST in horizontal |
| 402 for (i = 0; i < 4; ++i) { | 402 for (i = 0; i < 4; ++i) { |
| 403 iadst4_1d_dspr2(input, outptr); | 403 iadst4_dspr2(input, outptr); |
| 404 input += 4; | 404 input += 4; |
| 405 outptr += 4; | 405 outptr += 4; |
| 406 } | 406 } |
| 407 | 407 |
| 408 for (i = 0; i < 4; ++i) { | 408 for (i = 0; i < 4; ++i) { |
| 409 for (j = 0; j < 4; ++j) { | 409 for (j = 0; j < 4; ++j) { |
| 410 temp_in[i * 4 + j] = out[j * 4 + i]; | 410 temp_in[i * 4 + j] = out[j * 4 + i]; |
| 411 } | 411 } |
| 412 } | 412 } |
| 413 vp9_idct4_1d_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); | 413 vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); |
| 414 break; | 414 break; |
| 415 case ADST_ADST: // ADST in both directions | 415 case ADST_ADST: // ADST in both directions |
| 416 for (i = 0; i < 4; ++i) { | 416 for (i = 0; i < 4; ++i) { |
| 417 iadst4_1d_dspr2(input, outptr); | 417 iadst4_dspr2(input, outptr); |
| 418 input += 4; | 418 input += 4; |
| 419 outptr += 4; | 419 outptr += 4; |
| 420 } | 420 } |
| 421 | 421 |
| 422 for (i = 0; i < 4; ++i) { | 422 for (i = 0; i < 4; ++i) { |
| 423 for (j = 0; j < 4; ++j) | 423 for (j = 0; j < 4; ++j) |
| 424 temp_in[j] = out[j * 4 + i]; | 424 temp_in[j] = out[j * 4 + i]; |
| 425 iadst4_1d_dspr2(temp_in, temp_out); | 425 iadst4_dspr2(temp_in, temp_out); |
| 426 | 426 |
| 427 for (j = 0; j < 4; ++j) | 427 for (j = 0; j < 4; ++j) |
| 428 dest[j * dest_stride + i] = | 428 dest[j * dest_stride + i] = |
| 429 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | 429 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) |
| 430 + dest[j * dest_stride + i]); | 430 + dest[j * dest_stride + i]); |
| 431 } | 431 } |
| 432 break; | 432 break; |
| 433 default: | 433 default: |
| 434 printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); | 434 printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); |
| 435 break; | 435 break; |
| 436 } | 436 } |
| 437 } | 437 } |
| 438 #endif // #if HAVE_DSPR2 | 438 #endif // #if HAVE_DSPR2 |
| OLD | NEW |