OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <assert.h> | 11 #include <assert.h> |
12 #include <stdio.h> | 12 #include <stdio.h> |
13 | 13 |
14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
16 #include "vp9/common/vp9_common.h" | 16 #include "vp9/common/vp9_common.h" |
17 #include "vp9/common/vp9_blockd.h" | 17 #include "vp9/common/vp9_blockd.h" |
18 #include "vp9/common/vp9_idct.h" | 18 #include "vp9/common/vp9_idct.h" |
19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h" | 19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h" |
20 | 20 |
21 #if HAVE_DSPR2 | 21 #if HAVE_DSPR2 |
22 static void vp9_idct4_1d_rows_dspr2(const int16_t *input, int16_t *output) { | 22 static void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) { |
23 int16_t step_0, step_1, step_2, step_3; | 23 int16_t step_0, step_1, step_2, step_3; |
24 int Temp0, Temp1, Temp2, Temp3; | 24 int Temp0, Temp1, Temp2, Temp3; |
25 const int const_2_power_13 = 8192; | 25 const int const_2_power_13 = 8192; |
26 int i; | 26 int i; |
27 | 27 |
28 for (i = 4; i--; ) { | 28 for (i = 4; i--; ) { |
29 __asm__ __volatile__ ( | 29 __asm__ __volatile__ ( |
30 /* | 30 /* |
31 temp_1 = (input[0] + input[2]) * cospi_16_64; | 31 temp_1 = (input[0] + input[2]) * cospi_16_64; |
32 step_0 = dct_const_round_shift(temp_1); | 32 step_0 = dct_const_round_shift(temp_1); |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
97 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64), | 97 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64), |
98 [cospi_24_64] "r" (cospi_24_64), | 98 [cospi_24_64] "r" (cospi_24_64), |
99 [input] "r" (input) | 99 [input] "r" (input) |
100 ); | 100 ); |
101 | 101 |
102 input += 4; | 102 input += 4; |
103 output += 1; | 103 output += 1; |
104 } | 104 } |
105 } | 105 } |
106 | 106 |
107 static void vp9_idct4_1d_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, | 107 static void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, |
108 int dest_stride) { | 108 int dest_stride) { |
109 int16_t step_0, step_1, step_2, step_3; | 109 int16_t step_0, step_1, step_2, step_3; |
110 int Temp0, Temp1, Temp2, Temp3; | 110 int Temp0, Temp1, Temp2, Temp3; |
111 const int const_2_power_13 = 8192; | 111 const int const_2_power_13 = 8192; |
112 int i; | 112 int i; |
113 uint8_t *dest_pix; | 113 uint8_t *dest_pix; |
114 uint8_t *cm = vp9_ff_cropTbl; | 114 uint8_t *cm = vp9_ff_cropTbl; |
115 | 115 |
116 /* prefetch vp9_ff_cropTbl */ | 116 /* prefetch vp9_ff_cropTbl */ |
117 vp9_prefetch_load(vp9_ff_cropTbl); | 117 vp9_prefetch_load(vp9_ff_cropTbl); |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
233 uint32_t pos = 45; | 233 uint32_t pos = 45; |
234 | 234 |
235 /* bit positon for extract from acc */ | 235 /* bit positon for extract from acc */ |
236 __asm__ __volatile__ ( | 236 __asm__ __volatile__ ( |
237 "wrdsp %[pos], 1 \n\t" | 237 "wrdsp %[pos], 1 \n\t" |
238 : | 238 : |
239 : [pos] "r" (pos) | 239 : [pos] "r" (pos) |
240 ); | 240 ); |
241 | 241 |
242 // Rows | 242 // Rows |
243 vp9_idct4_1d_rows_dspr2(input, outptr); | 243 vp9_idct4_rows_dspr2(input, outptr); |
244 | 244 |
245 // Columns | 245 // Columns |
246 vp9_idct4_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride); | 246 vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); |
247 } | 247 } |
248 | 248 |
249 void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, | 249 void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, |
250 int dest_stride) { | 250 int dest_stride) { |
251 int a1, absa1; | 251 int a1, absa1; |
252 int r; | 252 int r; |
253 int32_t out; | 253 int32_t out; |
254 int t2, vector_a1, vector_a; | 254 int t2, vector_a1, vector_a; |
255 uint32_t pos = 45; | 255 uint32_t pos = 45; |
256 int16_t input_dc = input[0]; | 256 int16_t input_dc = input[0]; |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
312 "add %[dest], %[dest], %[dest_stride] \n\t" | 312 "add %[dest], %[dest], %[dest_stride] \n\t" |
313 | 313 |
314 : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), | 314 : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), |
315 [dest] "+&r" (dest) | 315 [dest] "+&r" (dest) |
316 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) | 316 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) |
317 ); | 317 ); |
318 } | 318 } |
319 } | 319 } |
320 } | 320 } |
321 | 321 |
322 static void iadst4_1d_dspr2(const int16_t *input, int16_t *output) { | 322 static void iadst4_dspr2(const int16_t *input, int16_t *output) { |
323 int s0, s1, s2, s3, s4, s5, s6, s7; | 323 int s0, s1, s2, s3, s4, s5, s6, s7; |
324 int x0, x1, x2, x3; | 324 int x0, x1, x2, x3; |
325 | 325 |
326 x0 = input[0]; | 326 x0 = input[0]; |
327 x1 = input[1]; | 327 x1 = input[1]; |
328 x2 = input[2]; | 328 x2 = input[2]; |
329 x3 = input[3]; | 329 x3 = input[3]; |
330 | 330 |
331 if (!(x0 | x1 | x2 | x3)) { | 331 if (!(x0 | x1 | x2 | x3)) { |
332 output[0] = output[1] = output[2] = output[3] = 0; | 332 output[0] = output[1] = output[2] = output[3] = 0; |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
372 | 372 |
373 /* bit positon for extract from acc */ | 373 /* bit positon for extract from acc */ |
374 __asm__ __volatile__ ( | 374 __asm__ __volatile__ ( |
375 "wrdsp %[pos], 1 \n\t" | 375 "wrdsp %[pos], 1 \n\t" |
376 : | 376 : |
377 : [pos] "r" (pos) | 377 : [pos] "r" (pos) |
378 ); | 378 ); |
379 | 379 |
380 switch (tx_type) { | 380 switch (tx_type) { |
381 case DCT_DCT: // DCT in both horizontal and vertical | 381 case DCT_DCT: // DCT in both horizontal and vertical |
382 vp9_idct4_1d_rows_dspr2(input, outptr); | 382 vp9_idct4_rows_dspr2(input, outptr); |
383 vp9_idct4_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride); | 383 vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); |
384 break; | 384 break; |
385 case ADST_DCT: // ADST in vertical, DCT in horizontal | 385 case ADST_DCT: // ADST in vertical, DCT in horizontal |
386 vp9_idct4_1d_rows_dspr2(input, outptr); | 386 vp9_idct4_rows_dspr2(input, outptr); |
387 | 387 |
388 outptr = out; | 388 outptr = out; |
389 | 389 |
390 for (i = 0; i < 4; ++i) { | 390 for (i = 0; i < 4; ++i) { |
391 iadst4_1d_dspr2(outptr, temp_out); | 391 iadst4_dspr2(outptr, temp_out); |
392 | 392 |
393 for (j = 0; j < 4; ++j) | 393 for (j = 0; j < 4; ++j) |
394 dest[j * dest_stride + i] = | 394 dest[j * dest_stride + i] = |
395 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | 395 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) |
396 + dest[j * dest_stride + i]); | 396 + dest[j * dest_stride + i]); |
397 | 397 |
398 outptr += 4; | 398 outptr += 4; |
399 } | 399 } |
400 break; | 400 break; |
401 case DCT_ADST: // DCT in vertical, ADST in horizontal | 401 case DCT_ADST: // DCT in vertical, ADST in horizontal |
402 for (i = 0; i < 4; ++i) { | 402 for (i = 0; i < 4; ++i) { |
403 iadst4_1d_dspr2(input, outptr); | 403 iadst4_dspr2(input, outptr); |
404 input += 4; | 404 input += 4; |
405 outptr += 4; | 405 outptr += 4; |
406 } | 406 } |
407 | 407 |
408 for (i = 0; i < 4; ++i) { | 408 for (i = 0; i < 4; ++i) { |
409 for (j = 0; j < 4; ++j) { | 409 for (j = 0; j < 4; ++j) { |
410 temp_in[i * 4 + j] = out[j * 4 + i]; | 410 temp_in[i * 4 + j] = out[j * 4 + i]; |
411 } | 411 } |
412 } | 412 } |
413 vp9_idct4_1d_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); | 413 vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); |
414 break; | 414 break; |
415 case ADST_ADST: // ADST in both directions | 415 case ADST_ADST: // ADST in both directions |
416 for (i = 0; i < 4; ++i) { | 416 for (i = 0; i < 4; ++i) { |
417 iadst4_1d_dspr2(input, outptr); | 417 iadst4_dspr2(input, outptr); |
418 input += 4; | 418 input += 4; |
419 outptr += 4; | 419 outptr += 4; |
420 } | 420 } |
421 | 421 |
422 for (i = 0; i < 4; ++i) { | 422 for (i = 0; i < 4; ++i) { |
423 for (j = 0; j < 4; ++j) | 423 for (j = 0; j < 4; ++j) |
424 temp_in[j] = out[j * 4 + i]; | 424 temp_in[j] = out[j * 4 + i]; |
425 iadst4_1d_dspr2(temp_in, temp_out); | 425 iadst4_dspr2(temp_in, temp_out); |
426 | 426 |
427 for (j = 0; j < 4; ++j) | 427 for (j = 0; j < 4; ++j) |
428 dest[j * dest_stride + i] = | 428 dest[j * dest_stride + i] = |
429 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | 429 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) |
430 + dest[j * dest_stride + i]); | 430 + dest[j * dest_stride + i]); |
431 } | 431 } |
432 break; | 432 break; |
433 default: | 433 default: |
434 printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); | 434 printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); |
435 break; | 435 break; |
436 } | 436 } |
437 } | 437 } |
438 #endif // #if HAVE_DSPR2 | 438 #endif // #if HAVE_DSPR2 |
OLD | NEW |