OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <assert.h> | 11 #include <assert.h> |
12 #include <stdio.h> | 12 #include <stdio.h> |
13 | 13 |
14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
16 #include "vp9/common/vp9_common.h" | 16 #include "vp9/common/vp9_common.h" |
17 #include "vp9/common/vp9_blockd.h" | 17 #include "vp9/common/vp9_blockd.h" |
18 #include "vp9/common/vp9_idct.h" | 18 #include "vp9/common/vp9_idct.h" |
19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h" | 19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h" |
20 | 20 |
21 #if HAVE_DSPR2 | 21 #if HAVE_DSPR2 |
22 static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output) { | 22 static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output, |
| 23 uint32_t no_rows) { |
23 int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6; | 24 int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6; |
24 int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13; | 25 int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13; |
25 int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20; | 26 int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20; |
26 int16_t step1_21, step1_22, step1_23, step1_24, step1_25, step1_26, step1_27; | 27 int16_t step1_21, step1_22, step1_23, step1_24, step1_25, step1_26, step1_27; |
27 int16_t step1_28, step1_29, step1_30, step1_31; | 28 int16_t step1_28, step1_29, step1_30, step1_31; |
28 int16_t step2_0, step2_1, step2_2, step2_3, step2_4, step2_5, step2_6; | 29 int16_t step2_0, step2_1, step2_2, step2_3, step2_4, step2_5, step2_6; |
29 int16_t step2_7, step2_8, step2_9, step2_10, step2_11, step2_12, step2_13; | 30 int16_t step2_7, step2_8, step2_9, step2_10, step2_11, step2_12, step2_13; |
30 int16_t step2_14, step2_15, step2_16, step2_17, step2_18, step2_19, step2_20; | 31 int16_t step2_14, step2_15, step2_16, step2_17, step2_18, step2_19, step2_20; |
31 int16_t step2_21, step2_22, step2_23, step2_24, step2_25, step2_26, step2_27; | 32 int16_t step2_21, step2_22, step2_23, step2_24, step2_25, step2_26, step2_27; |
32 int16_t step2_28, step2_29, step2_30, step2_31; | 33 int16_t step2_28, step2_29, step2_30, step2_31; |
33 int16_t step3_8, step3_9, step3_10, step3_11, step3_12, step3_13, step3_14; | 34 int16_t step3_8, step3_9, step3_10, step3_11, step3_12, step3_13, step3_14; |
34 int16_t step3_15, step3_16, step3_17, step3_18, step3_19, step3_20, step3_21; | 35 int16_t step3_15, step3_16, step3_17, step3_18, step3_19, step3_20, step3_21; |
35 int16_t step3_22, step3_23, step3_24, step3_25, step3_26, step3_27, step3_28; | 36 int16_t step3_22, step3_23, step3_24, step3_25, step3_26, step3_27, step3_28; |
36 int16_t step3_29, step3_30, step3_31; | 37 int16_t step3_29, step3_30, step3_31; |
37 int temp0, temp1, temp2, temp3; | 38 int temp0, temp1, temp2, temp3; |
38 int load1, load2, load3, load4; | 39 int load1, load2, load3, load4; |
39 int result1, result2; | 40 int result1, result2; |
40 int temp21; | 41 int temp21; |
41 int i; | 42 int i; |
42 const int const_2_power_13 = 8192; | 43 const int const_2_power_13 = 8192; |
43 const int32_t *input_int; | 44 const int32_t *input_int; |
44 | 45 |
45 for (i = 32; i--; ) { | 46 for (i = no_rows; i--; ) { |
46 input_int = (const int32_t *)input; | 47 input_int = (const int32_t *)input; |
47 | 48 |
48 if (!(input_int[0] | input_int[1] | input_int[2] | input_int[3] | | 49 if (!(input_int[0] | input_int[1] | input_int[2] | input_int[3] | |
49 input_int[4] | input_int[5] | input_int[6] | input_int[7] | | 50 input_int[4] | input_int[5] | input_int[6] | input_int[7] | |
50 input_int[8] | input_int[9] | input_int[10] | input_int[11] | | 51 input_int[8] | input_int[9] | input_int[10] | input_int[11] | |
51 input_int[12] | input_int[13] | input_int[14] | input_int[15])) { | 52 input_int[12] | input_int[13] | input_int[14] | input_int[15])) { |
52 input += 32; | 53 input += 32; |
53 | 54 |
54 __asm__ __volatile__ ( | 55 __asm__ __volatile__ ( |
55 "sh $zero, 0(%[output]) \n\t" | 56 "sh $zero, 0(%[output]) \n\t" |
(...skipping 818 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
874 uint32_t pos = 45; | 875 uint32_t pos = 45; |
875 | 876 |
876 /* bit positon for extract from acc */ | 877 /* bit positon for extract from acc */ |
877 __asm__ __volatile__ ( | 878 __asm__ __volatile__ ( |
878 "wrdsp %[pos], 1 \n\t" | 879 "wrdsp %[pos], 1 \n\t" |
879 : | 880 : |
880 : [pos] "r" (pos) | 881 : [pos] "r" (pos) |
881 ); | 882 ); |
882 | 883 |
883 // Rows | 884 // Rows |
884 idct32_1d_rows_dspr2(input, outptr); | 885 idct32_1d_rows_dspr2(input, outptr, 32); |
885 | 886 |
886 // Columns | 887 // Columns |
887 vp9_idct32_1d_cols_add_blk_dspr2(out, dest, dest_stride); | 888 vp9_idct32_1d_cols_add_blk_dspr2(out, dest, dest_stride); |
888 } | 889 } |
889 | 890 |
| 891 void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest, |
| 892 int stride) { |
| 893 DECLARE_ALIGNED(32, int16_t, out[32 * 32]); |
| 894 int16_t *outptr = out; |
| 895 uint32_t i; |
| 896 uint32_t pos = 45; |
| 897 |
| 898 /* bit positon for extract from acc */ |
| 899 __asm__ __volatile__ ( |
| 900 "wrdsp %[pos], 1 \n\t" |
| 901 : |
| 902 : [pos] "r" (pos) |
| 903 ); |
| 904 |
| 905 // Rows |
| 906 idct32_1d_rows_dspr2(input, outptr, 8); |
| 907 |
| 908 outptr += 8; |
| 909 __asm__ __volatile__ ( |
| 910 "sw $zero, 0(%[outptr]) \n\t" |
| 911 "sw $zero, 4(%[outptr]) \n\t" |
| 912 "sw $zero, 8(%[outptr]) \n\t" |
| 913 "sw $zero, 12(%[outptr]) \n\t" |
| 914 "sw $zero, 16(%[outptr]) \n\t" |
| 915 "sw $zero, 20(%[outptr]) \n\t" |
| 916 "sw $zero, 24(%[outptr]) \n\t" |
| 917 "sw $zero, 28(%[outptr]) \n\t" |
| 918 "sw $zero, 32(%[outptr]) \n\t" |
| 919 "sw $zero, 36(%[outptr]) \n\t" |
| 920 "sw $zero, 40(%[outptr]) \n\t" |
| 921 "sw $zero, 44(%[outptr]) \n\t" |
| 922 |
| 923 : |
| 924 : [outptr] "r" (outptr) |
| 925 ); |
| 926 |
| 927 for (i = 0; i < 31; ++i) { |
| 928 outptr += 32; |
| 929 |
| 930 __asm__ __volatile__ ( |
| 931 "sw $zero, 0(%[outptr]) \n\t" |
| 932 "sw $zero, 4(%[outptr]) \n\t" |
| 933 "sw $zero, 8(%[outptr]) \n\t" |
| 934 "sw $zero, 12(%[outptr]) \n\t" |
| 935 "sw $zero, 16(%[outptr]) \n\t" |
| 936 "sw $zero, 20(%[outptr]) \n\t" |
| 937 "sw $zero, 24(%[outptr]) \n\t" |
| 938 "sw $zero, 28(%[outptr]) \n\t" |
| 939 "sw $zero, 32(%[outptr]) \n\t" |
| 940 "sw $zero, 36(%[outptr]) \n\t" |
| 941 "sw $zero, 40(%[outptr]) \n\t" |
| 942 "sw $zero, 44(%[outptr]) \n\t" |
| 943 |
| 944 : |
| 945 : [outptr] "r" (outptr) |
| 946 ); |
| 947 } |
| 948 |
| 949 // Columns |
| 950 vp9_idct32_1d_cols_add_blk_dspr2(out, dest, stride); |
| 951 } |
| 952 |
890 void vp9_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest, | 953 void vp9_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest, |
891 int stride) { | 954 int stride) { |
892 int r, out; | 955 int r, out; |
893 int32_t a1, absa1; | 956 int32_t a1, absa1; |
894 int32_t vector_a1; | 957 int32_t vector_a1; |
895 int32_t t1, t2, t3, t4; | 958 int32_t t1, t2, t3, t4; |
896 int32_t vector_1, vector_2, vector_3, vector_4; | 959 int32_t vector_1, vector_2, vector_3, vector_4; |
897 uint32_t pos = 45; | 960 uint32_t pos = 45; |
898 | 961 |
899 /* bit positon for extract from acc */ | 962 /* bit positon for extract from acc */ |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1004 : [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3), [t4] "=&r" (t4), | 1067 : [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3), [t4] "=&r" (t4), |
1005 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2), | 1068 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2), |
1006 [vector_3] "=&r" (vector_3), [vector_4] "=&r" (vector_4), | 1069 [vector_3] "=&r" (vector_3), [vector_4] "=&r" (vector_4), |
1007 [dest] "+&r" (dest) | 1070 [dest] "+&r" (dest) |
1008 : [stride] "r" (stride), [vector_a1] "r" (vector_a1) | 1071 : [stride] "r" (stride), [vector_a1] "r" (vector_a1) |
1009 ); | 1072 ); |
1010 } | 1073 } |
1011 } | 1074 } |
1012 } | 1075 } |
1013 #endif // #if HAVE_DSPR2 | 1076 #endif // #if HAVE_DSPR2 |
OLD | NEW |