OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <assert.h> | |
12 #include <stdio.h> | |
13 | |
14 #include "./vpx_config.h" | 11 #include "./vpx_config.h" |
15 #include "./vp9_rtcd.h" | 12 #include "./vpx_dsp_rtcd.h" |
16 #include "vp9/common/vp9_common.h" | 13 #include "vpx_dsp/mips/inv_txfm_dspr2.h" |
17 #include "vp9/common/vp9_blockd.h" | |
18 #include "vp9/common/vp9_idct.h" | |
19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h" | |
20 #include "vpx_dsp/txfm_common.h" | 14 #include "vpx_dsp/txfm_common.h" |
21 #include "vpx_ports/mem.h" | |
22 | 15 |
23 #if HAVE_DSPR2 | 16 #if HAVE_DSPR2 |
24 static void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) { | 17 void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output) { |
25 int16_t step_0, step_1, step_2, step_3; | 18 int16_t step_0, step_1, step_2, step_3; |
26 int Temp0, Temp1, Temp2, Temp3; | 19 int Temp0, Temp1, Temp2, Temp3; |
27 const int const_2_power_13 = 8192; | 20 const int const_2_power_13 = 8192; |
28 int i; | 21 int i; |
29 | 22 |
30 for (i = 4; i--; ) { | 23 for (i = 4; i--; ) { |
31 __asm__ __volatile__ ( | 24 __asm__ __volatile__ ( |
32 /* | 25 /* |
33 temp_1 = (input[0] + input[2]) * cospi_16_64; | 26 temp_1 = (input[0] + input[2]) * cospi_16_64; |
34 step_0 = dct_const_round_shift(temp_1); | 27 step_0 = dct_const_round_shift(temp_1); |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
99 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64), | 92 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64), |
100 [cospi_24_64] "r" (cospi_24_64), | 93 [cospi_24_64] "r" (cospi_24_64), |
101 [input] "r" (input) | 94 [input] "r" (input) |
102 ); | 95 ); |
103 | 96 |
104 input += 4; | 97 input += 4; |
105 output += 1; | 98 output += 1; |
106 } | 99 } |
107 } | 100 } |
108 | 101 |
109 static void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, | 102 void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, |
110 int dest_stride) { | 103 int dest_stride) { |
111 int16_t step_0, step_1, step_2, step_3; | 104 int16_t step_0, step_1, step_2, step_3; |
112 int Temp0, Temp1, Temp2, Temp3; | 105 int Temp0, Temp1, Temp2, Temp3; |
113 const int const_2_power_13 = 8192; | 106 const int const_2_power_13 = 8192; |
114 int i; | 107 int i; |
115 uint8_t *dest_pix; | 108 uint8_t *dest_pix; |
116 uint8_t *cm = vpx_ff_cropTbl; | 109 uint8_t *cm = vpx_ff_cropTbl; |
117 | 110 |
118 /* prefetch vpx_ff_cropTbl */ | 111 /* prefetch vpx_ff_cropTbl */ |
119 prefetch_load(vpx_ff_cropTbl); | 112 prefetch_load(vpx_ff_cropTbl); |
120 prefetch_load(vpx_ff_cropTbl + 32); | 113 prefetch_load(vpx_ff_cropTbl + 32); |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
221 : [const_2_power_13] "r" (const_2_power_13), | 214 : [const_2_power_13] "r" (const_2_power_13), |
222 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64), | 215 [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64), |
223 [cospi_24_64] "r" (cospi_24_64), | 216 [cospi_24_64] "r" (cospi_24_64), |
224 [input] "r" (input), [cm] "r" (cm), [dest_stride] "r" (dest_stride) | 217 [input] "r" (input), [cm] "r" (cm), [dest_stride] "r" (dest_stride) |
225 ); | 218 ); |
226 | 219 |
227 input += 4; | 220 input += 4; |
228 } | 221 } |
229 } | 222 } |
230 | 223 |
231 void vp9_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, | 224 void vpx_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, |
232 int dest_stride) { | 225 int dest_stride) { |
233 DECLARE_ALIGNED(32, int16_t, out[4 * 4]); | 226 DECLARE_ALIGNED(32, int16_t, out[4 * 4]); |
234 int16_t *outptr = out; | 227 int16_t *outptr = out; |
235 uint32_t pos = 45; | 228 uint32_t pos = 45; |
236 | 229 |
237 /* bit positon for extract from acc */ | 230 /* bit positon for extract from acc */ |
238 __asm__ __volatile__ ( | 231 __asm__ __volatile__ ( |
239 "wrdsp %[pos], 1 \n\t" | 232 "wrdsp %[pos], 1 \n\t" |
240 : | 233 : |
241 : [pos] "r" (pos) | 234 : [pos] "r" (pos) |
242 ); | 235 ); |
243 | 236 |
244 // Rows | 237 // Rows |
245 vp9_idct4_rows_dspr2(input, outptr); | 238 vpx_idct4_rows_dspr2(input, outptr); |
246 | 239 |
247 // Columns | 240 // Columns |
248 vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); | 241 vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); |
249 } | 242 } |
250 | 243 |
251 void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, | 244 void vpx_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, |
252 int dest_stride) { | 245 int dest_stride) { |
253 int a1, absa1; | 246 int a1, absa1; |
254 int r; | 247 int r; |
255 int32_t out; | 248 int32_t out; |
256 int t2, vector_a1, vector_a; | 249 int t2, vector_a1, vector_a; |
257 uint32_t pos = 45; | 250 uint32_t pos = 45; |
258 int16_t input_dc = input[0]; | 251 int16_t input_dc = input[0]; |
259 | 252 |
260 /* bit positon for extract from acc */ | 253 /* bit positon for extract from acc */ |
261 __asm__ __volatile__ ( | 254 __asm__ __volatile__ ( |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
314 "add %[dest], %[dest], %[dest_stride] \n\t" | 307 "add %[dest], %[dest], %[dest_stride] \n\t" |
315 | 308 |
316 : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), | 309 : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), |
317 [dest] "+&r" (dest) | 310 [dest] "+&r" (dest) |
318 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) | 311 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) |
319 ); | 312 ); |
320 } | 313 } |
321 } | 314 } |
322 } | 315 } |
323 | 316 |
324 static void iadst4_dspr2(const int16_t *input, int16_t *output) { | 317 void iadst4_dspr2(const int16_t *input, int16_t *output) { |
325 int s0, s1, s2, s3, s4, s5, s6, s7; | 318 int s0, s1, s2, s3, s4, s5, s6, s7; |
326 int x0, x1, x2, x3; | 319 int x0, x1, x2, x3; |
327 | 320 |
328 x0 = input[0]; | 321 x0 = input[0]; |
329 x1 = input[1]; | 322 x1 = input[1]; |
330 x2 = input[2]; | 323 x2 = input[2]; |
331 x3 = input[3]; | 324 x3 = input[3]; |
332 | 325 |
333 if (!(x0 | x1 | x2 | x3)) { | 326 if (!(x0 | x1 | x2 | x3)) { |
334 output[0] = output[1] = output[2] = output[3] = 0; | 327 output[0] = output[1] = output[2] = output[3] = 0; |
(...skipping 21 matching lines...) Expand all Loading... |
356 | 349 |
357 // 1-D transform scaling factor is sqrt(2). | 350 // 1-D transform scaling factor is sqrt(2). |
358 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 351 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
359 // + 1b (addition) = 29b. | 352 // + 1b (addition) = 29b. |
360 // Hence the output bit depth is 15b. | 353 // Hence the output bit depth is 15b. |
361 output[0] = dct_const_round_shift(s0); | 354 output[0] = dct_const_round_shift(s0); |
362 output[1] = dct_const_round_shift(s1); | 355 output[1] = dct_const_round_shift(s1); |
363 output[2] = dct_const_round_shift(s2); | 356 output[2] = dct_const_round_shift(s2); |
364 output[3] = dct_const_round_shift(s3); | 357 output[3] = dct_const_round_shift(s3); |
365 } | 358 } |
366 | |
367 void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, | |
368 int dest_stride, int tx_type) { | |
369 int i, j; | |
370 DECLARE_ALIGNED(32, int16_t, out[4 * 4]); | |
371 int16_t *outptr = out; | |
372 int16_t temp_in[4 * 4], temp_out[4]; | |
373 uint32_t pos = 45; | |
374 | |
375 /* bit positon for extract from acc */ | |
376 __asm__ __volatile__ ( | |
377 "wrdsp %[pos], 1 \n\t" | |
378 : | |
379 : [pos] "r" (pos) | |
380 ); | |
381 | |
382 switch (tx_type) { | |
383 case DCT_DCT: // DCT in both horizontal and vertical | |
384 vp9_idct4_rows_dspr2(input, outptr); | |
385 vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); | |
386 break; | |
387 case ADST_DCT: // ADST in vertical, DCT in horizontal | |
388 vp9_idct4_rows_dspr2(input, outptr); | |
389 | |
390 outptr = out; | |
391 | |
392 for (i = 0; i < 4; ++i) { | |
393 iadst4_dspr2(outptr, temp_out); | |
394 | |
395 for (j = 0; j < 4; ++j) | |
396 dest[j * dest_stride + i] = | |
397 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | |
398 + dest[j * dest_stride + i]); | |
399 | |
400 outptr += 4; | |
401 } | |
402 break; | |
403 case DCT_ADST: // DCT in vertical, ADST in horizontal | |
404 for (i = 0; i < 4; ++i) { | |
405 iadst4_dspr2(input, outptr); | |
406 input += 4; | |
407 outptr += 4; | |
408 } | |
409 | |
410 for (i = 0; i < 4; ++i) { | |
411 for (j = 0; j < 4; ++j) { | |
412 temp_in[i * 4 + j] = out[j * 4 + i]; | |
413 } | |
414 } | |
415 vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); | |
416 break; | |
417 case ADST_ADST: // ADST in both directions | |
418 for (i = 0; i < 4; ++i) { | |
419 iadst4_dspr2(input, outptr); | |
420 input += 4; | |
421 outptr += 4; | |
422 } | |
423 | |
424 for (i = 0; i < 4; ++i) { | |
425 for (j = 0; j < 4; ++j) | |
426 temp_in[j] = out[j * 4 + i]; | |
427 iadst4_dspr2(temp_in, temp_out); | |
428 | |
429 for (j = 0; j < 4; ++j) | |
430 dest[j * dest_stride + i] = | |
431 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | |
432 + dest[j * dest_stride + i]); | |
433 } | |
434 break; | |
435 default: | |
436 printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); | |
437 break; | |
438 } | |
439 } | |
440 #endif // #if HAVE_DSPR2 | 359 #endif // #if HAVE_DSPR2 |
OLD | NEW |