OLD | NEW |
1 // Copyright 2014 Google Inc. All Rights Reserved. | 1 // Copyright 2014 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // MIPS version of speed-critical encoding functions. | 10 // MIPS version of speed-critical encoding functions. |
(...skipping 16 matching lines...) Expand all Loading... |
27 static const int kC1 = 20091 + (1 << 16); | 27 static const int kC1 = 20091 + (1 << 16); |
28 static const int kC2 = 35468; | 28 static const int kC2 = 35468; |
29 | 29 |
30 // macro for one vertical pass in ITransformOne | 30 // macro for one vertical pass in ITransformOne |
31 // MUL macro inlined | 31 // MUL macro inlined |
32 // temp0..temp15 holds tmp[0]..tmp[15] | 32 // temp0..temp15 holds tmp[0]..tmp[15] |
33 // A..D - offsets in bytes to load from in buffer | 33 // A..D - offsets in bytes to load from in buffer |
34 // TEMP0..TEMP3 - registers for corresponding tmp elements | 34 // TEMP0..TEMP3 - registers for corresponding tmp elements |
35 // TEMP4..TEMP5 - temporary registers | 35 // TEMP4..TEMP5 - temporary registers |
36 #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \ | 36 #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \ |
37 "lh %[temp16], "#A"(%[temp20]) \n\t" \ | 37 "lh %[temp16], " #A "(%[temp20]) \n\t" \ |
38 "lh %[temp18], "#B"(%[temp20]) \n\t" \ | 38 "lh %[temp18], " #B "(%[temp20]) \n\t" \ |
39 "lh %[temp17], "#C"(%[temp20]) \n\t" \ | 39 "lh %[temp17], " #C "(%[temp20]) \n\t" \ |
40 "lh %[temp19], "#D"(%[temp20]) \n\t" \ | 40 "lh %[temp19], " #D "(%[temp20]) \n\t" \ |
41 "addu %["#TEMP4"], %[temp16], %[temp18] \n\t" \ | 41 "addu %[" #TEMP4 "], %[temp16], %[temp18] \n\t" \ |
42 "subu %[temp16], %[temp16], %[temp18] \n\t" \ | 42 "subu %[temp16], %[temp16], %[temp18] \n\t" \ |
43 "mul %["#TEMP0"], %[temp17], %[kC2] \n\t" \ | 43 "mul %[" #TEMP0 "], %[temp17], %[kC2] \n\t" \ |
44 "mul %[temp18], %[temp19], %[kC1] \n\t" \ | 44 "mul %[temp18], %[temp19], %[kC1] \n\t" \ |
45 "mul %[temp17], %[temp17], %[kC1] \n\t" \ | 45 "mul %[temp17], %[temp17], %[kC1] \n\t" \ |
46 "mul %[temp19], %[temp19], %[kC2] \n\t" \ | 46 "mul %[temp19], %[temp19], %[kC2] \n\t" \ |
47 "sra %["#TEMP0"], %["#TEMP0"], 16 \n\n" \ | 47 "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\n" \ |
48 "sra %[temp18], %[temp18], 16 \n\n" \ | 48 "sra %[temp18], %[temp18], 16 \n\n" \ |
49 "sra %[temp17], %[temp17], 16 \n\n" \ | 49 "sra %[temp17], %[temp17], 16 \n\n" \ |
50 "sra %[temp19], %[temp19], 16 \n\n" \ | 50 "sra %[temp19], %[temp19], 16 \n\n" \ |
51 "subu %["#TEMP2"], %["#TEMP0"], %[temp18] \n\t" \ | 51 "subu %[" #TEMP2 "], %[" #TEMP0 "], %[temp18] \n\t" \ |
52 "addu %["#TEMP3"], %[temp17], %[temp19] \n\t" \ | 52 "addu %[" #TEMP3 "], %[temp17], %[temp19] \n\t" \ |
53 "addu %["#TEMP0"], %["#TEMP4"], %["#TEMP3"] \n\t" \ | 53 "addu %[" #TEMP0 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t" \ |
54 "addu %["#TEMP1"], %[temp16], %["#TEMP2"] \n\t" \ | 54 "addu %[" #TEMP1 "], %[temp16], %[" #TEMP2 "] \n\t" \ |
55 "subu %["#TEMP2"], %[temp16], %["#TEMP2"] \n\t" \ | 55 "subu %[" #TEMP2 "], %[temp16], %[" #TEMP2 "] \n\t" \ |
56 "subu %["#TEMP3"], %["#TEMP4"], %["#TEMP3"] \n\t" | 56 "subu %[" #TEMP3 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t" |
57 | 57 |
58 // macro for one horizontal pass in ITransformOne | 58 // macro for one horizontal pass in ITransformOne |
59 // MUL and STORE macros inlined | 59 // MUL and STORE macros inlined |
60 // a = clip_8b(a) is replaced with: a = max(a, 0); a = min(a, 255) | 60 // a = clip_8b(a) is replaced with: a = max(a, 0); a = min(a, 255) |
61 // temp0..temp15 holds tmp[0]..tmp[15] | 61 // temp0..temp15 holds tmp[0]..tmp[15] |
62 // A..D - offsets in bytes to load from ref and store to dst buffer | 62 // A..D - offsets in bytes to load from ref and store to dst buffer |
63 // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements | 63 // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements |
64 #define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \ | 64 #define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \ |
65 "addiu %["#TEMP0"], %["#TEMP0"], 4 \n\t" \ | 65 "addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \ |
66 "addu %[temp16], %["#TEMP0"], %["#TEMP8"] \n\t" \ | 66 "addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ |
67 "subu %[temp17], %["#TEMP0"], %["#TEMP8"] \n\t" \ | 67 "subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ |
68 "mul %["#TEMP0"], %["#TEMP4"], %[kC2] \n\t" \ | 68 "mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \ |
69 "mul %["#TEMP8"], %["#TEMP12"], %[kC1] \n\t" \ | 69 "mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \ |
70 "mul %["#TEMP4"], %["#TEMP4"], %[kC1] \n\t" \ | 70 "mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \ |
71 "mul %["#TEMP12"], %["#TEMP12"], %[kC2] \n\t" \ | 71 "mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \ |
72 "sra %["#TEMP0"], %["#TEMP0"], 16 \n\t" \ | 72 "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \ |
73 "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \ | 73 "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \ |
74 "sra %["#TEMP4"], %["#TEMP4"], 16 \n\t" \ | 74 "sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \ |
75 "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \ | 75 "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \ |
76 "subu %[temp18], %["#TEMP0"], %["#TEMP8"] \n\t" \ | 76 "subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ |
77 "addu %[temp19], %["#TEMP4"], %["#TEMP12"] \n\t" \ | 77 "addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \ |
78 "addu %["#TEMP0"], %[temp16], %[temp19] \n\t" \ | 78 "addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \ |
79 "addu %["#TEMP4"], %[temp17], %[temp18] \n\t" \ | 79 "addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \ |
80 "subu %["#TEMP8"], %[temp17], %[temp18] \n\t" \ | 80 "subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \ |
81 "subu %["#TEMP12"], %[temp16], %[temp19] \n\t" \ | 81 "subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \ |
82 "lw %[temp20], 0(%[args]) \n\t" \ | 82 "lw %[temp20], 0(%[args]) \n\t" \ |
83 "sra %["#TEMP0"], %["#TEMP0"], 3 \n\t" \ | 83 "sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \ |
84 "sra %["#TEMP4"], %["#TEMP4"], 3 \n\t" \ | 84 "sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \ |
85 "sra %["#TEMP8"], %["#TEMP8"], 3 \n\t" \ | 85 "sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \ |
86 "sra %["#TEMP12"], %["#TEMP12"], 3 \n\t" \ | 86 "sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \ |
87 "lbu %[temp16], "#A"(%[temp20]) \n\t" \ | 87 "lbu %[temp16], " #A "(%[temp20]) \n\t" \ |
88 "lbu %[temp17], "#B"(%[temp20]) \n\t" \ | 88 "lbu %[temp17], " #B "(%[temp20]) \n\t" \ |
89 "lbu %[temp18], "#C"(%[temp20]) \n\t" \ | 89 "lbu %[temp18], " #C "(%[temp20]) \n\t" \ |
90 "lbu %[temp19], "#D"(%[temp20]) \n\t" \ | 90 "lbu %[temp19], " #D "(%[temp20]) \n\t" \ |
91 "addu %["#TEMP0"], %[temp16], %["#TEMP0"] \n\t" \ | 91 "addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \ |
92 "addu %["#TEMP4"], %[temp17], %["#TEMP4"] \n\t" \ | 92 "addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \ |
93 "addu %["#TEMP8"], %[temp18], %["#TEMP8"] \n\t" \ | 93 "addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \ |
94 "addu %["#TEMP12"], %[temp19], %["#TEMP12"] \n\t" \ | 94 "addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \ |
95 "slt %[temp16], %["#TEMP0"], $zero \n\t" \ | 95 "slt %[temp16], %[" #TEMP0 "], $zero \n\t" \ |
96 "slt %[temp17], %["#TEMP4"], $zero \n\t" \ | 96 "slt %[temp17], %[" #TEMP4 "], $zero \n\t" \ |
97 "slt %[temp18], %["#TEMP8"], $zero \n\t" \ | 97 "slt %[temp18], %[" #TEMP8 "], $zero \n\t" \ |
98 "slt %[temp19], %["#TEMP12"], $zero \n\t" \ | 98 "slt %[temp19], %[" #TEMP12 "], $zero \n\t" \ |
99 "movn %["#TEMP0"], $zero, %[temp16] \n\t" \ | 99 "movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \ |
100 "movn %["#TEMP4"], $zero, %[temp17] \n\t" \ | 100 "movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \ |
101 "movn %["#TEMP8"], $zero, %[temp18] \n\t" \ | 101 "movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \ |
102 "movn %["#TEMP12"], $zero, %[temp19] \n\t" \ | 102 "movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \ |
103 "addiu %[temp20], $zero, 255 \n\t" \ | 103 "addiu %[temp20], $zero, 255 \n\t" \ |
104 "slt %[temp16], %["#TEMP0"], %[temp20] \n\t" \ | 104 "slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \ |
105 "slt %[temp17], %["#TEMP4"], %[temp20] \n\t" \ | 105 "slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \ |
106 "slt %[temp18], %["#TEMP8"], %[temp20] \n\t" \ | 106 "slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \ |
107 "slt %[temp19], %["#TEMP12"], %[temp20] \n\t" \ | 107 "slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \ |
108 "movz %["#TEMP0"], %[temp20], %[temp16] \n\t" \ | 108 "movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \ |
109 "movz %["#TEMP4"], %[temp20], %[temp17] \n\t" \ | 109 "movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \ |
110 "lw %[temp16], 8(%[args]) \n\t" \ | 110 "lw %[temp16], 8(%[args]) \n\t" \ |
111 "movz %["#TEMP8"], %[temp20], %[temp18] \n\t" \ | 111 "movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \ |
112 "movz %["#TEMP12"], %[temp20], %[temp19] \n\t" \ | 112 "movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \ |
113 "sb %["#TEMP0"], "#A"(%[temp16]) \n\t" \ | 113 "sb %[" #TEMP0 "], " #A "(%[temp16]) \n\t" \ |
114 "sb %["#TEMP4"], "#B"(%[temp16]) \n\t" \ | 114 "sb %[" #TEMP4 "], " #B "(%[temp16]) \n\t" \ |
115 "sb %["#TEMP8"], "#C"(%[temp16]) \n\t" \ | 115 "sb %[" #TEMP8 "], " #C "(%[temp16]) \n\t" \ |
116 "sb %["#TEMP12"], "#D"(%[temp16]) \n\t" | 116 "sb %[" #TEMP12 "], " #D "(%[temp16]) \n\t" |
117 | 117 |
118 // Does one or two inverse transforms. | 118 // Does one or two inverse transforms. |
119 static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, | 119 static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, |
120 uint8_t* dst) { | 120 uint8_t* dst) { |
121 int temp0, temp1, temp2, temp3, temp4, temp5, temp6; | 121 int temp0, temp1, temp2, temp3, temp4, temp5, temp6; |
122 int temp7, temp8, temp9, temp10, temp11, temp12, temp13; | 122 int temp7, temp8, temp9, temp10, temp11, temp12, temp13; |
123 int temp14, temp15, temp16, temp17, temp18, temp19, temp20; | 123 int temp14, temp15, temp16, temp17, temp18, temp19, temp20; |
124 const int* args[3] = {(const int*)ref, (const int*)in, (const int*)dst}; | 124 const int* args[3] = {(const int*)ref, (const int*)in, (const int*)dst}; |
125 | 125 |
126 __asm__ volatile( | 126 __asm__ volatile( |
(...skipping 30 matching lines...) Expand all Loading... |
157 | 157 |
158 #undef VERTICAL_PASS | 158 #undef VERTICAL_PASS |
159 #undef HORIZONTAL_PASS | 159 #undef HORIZONTAL_PASS |
160 | 160 |
161 // macro for one pass through for loop in QuantizeBlock | 161 // macro for one pass through for loop in QuantizeBlock |
162 // QUANTDIV macro inlined | 162 // QUANTDIV macro inlined |
163 // J - offset in bytes (kZigzag[n] * 2) | 163 // J - offset in bytes (kZigzag[n] * 2) |
164 // K - offset in bytes (kZigzag[n] * 4) | 164 // K - offset in bytes (kZigzag[n] * 4) |
165 // N - offset in bytes (n * 2) | 165 // N - offset in bytes (n * 2) |
166 #define QUANTIZE_ONE(J, K, N) \ | 166 #define QUANTIZE_ONE(J, K, N) \ |
167 "lh %[temp0], "#J"(%[ppin]) \n\t" \ | 167 "lh %[temp0], " #J "(%[ppin]) \n\t" \ |
168 "lhu %[temp1], "#J"(%[ppsharpen]) \n\t" \ | 168 "lhu %[temp1], " #J "(%[ppsharpen]) \n\t" \ |
169 "lw %[temp2], "#K"(%[ppzthresh]) \n\t" \ | 169 "lw %[temp2], " #K "(%[ppzthresh]) \n\t" \ |
170 "sra %[sign], %[temp0], 15 \n\t" \ | 170 "sra %[sign], %[temp0], 15 \n\t" \ |
171 "xor %[coeff], %[temp0], %[sign] \n\t" \ | 171 "xor %[coeff], %[temp0], %[sign] \n\t" \ |
172 "subu %[coeff], %[coeff], %[sign] \n\t" \ | 172 "subu %[coeff], %[coeff], %[sign] \n\t" \ |
173 "addu %[coeff], %[coeff], %[temp1] \n\t" \ | 173 "addu %[coeff], %[coeff], %[temp1] \n\t" \ |
174 "slt %[temp4], %[temp2], %[coeff] \n\t" \ | 174 "slt %[temp4], %[temp2], %[coeff] \n\t" \ |
175 "addiu %[temp5], $zero, 0 \n\t" \ | 175 "addiu %[temp5], $zero, 0 \n\t" \ |
176 "addiu %[level], $zero, 0 \n\t" \ | 176 "addiu %[level], $zero, 0 \n\t" \ |
177 "beqz %[temp4], 2f \n\t" \ | 177 "beqz %[temp4], 2f \n\t" \ |
178 "lhu %[temp1], "#J"(%[ppiq]) \n\t" \ | 178 "lhu %[temp1], " #J "(%[ppiq]) \n\t" \ |
179 "lw %[temp2], "#K"(%[ppbias]) \n\t" \ | 179 "lw %[temp2], " #K "(%[ppbias]) \n\t" \ |
180 "lhu %[temp3], "#J"(%[ppq]) \n\t" \ | 180 "lhu %[temp3], " #J "(%[ppq]) \n\t" \ |
181 "mul %[level], %[coeff], %[temp1] \n\t" \ | 181 "mul %[level], %[coeff], %[temp1] \n\t" \ |
182 "addu %[level], %[level], %[temp2] \n\t" \ | 182 "addu %[level], %[level], %[temp2] \n\t" \ |
183 "sra %[level], %[level], 17 \n\t" \ | 183 "sra %[level], %[level], 17 \n\t" \ |
184 "slt %[temp4], %[max_level], %[level] \n\t" \ | 184 "slt %[temp4], %[max_level], %[level] \n\t" \ |
185 "movn %[level], %[max_level], %[temp4] \n\t" \ | 185 "movn %[level], %[max_level], %[temp4] \n\t" \ |
186 "xor %[level], %[level], %[sign] \n\t" \ | 186 "xor %[level], %[level], %[sign] \n\t" \ |
187 "subu %[level], %[level], %[sign] \n\t" \ | 187 "subu %[level], %[level], %[sign] \n\t" \ |
188 "mul %[temp5], %[level], %[temp3] \n\t" \ | 188 "mul %[temp5], %[level], %[temp3] \n\t" \ |
189 "2: \n\t" \ | 189 "2: \n\t" \ |
190 "sh %[temp5], "#J"(%[ppin]) \n\t" \ | 190 "sh %[temp5], " #J "(%[ppin]) \n\t" \ |
191 "sh %[level], "#N"(%[pout]) \n\t" | 191 "sh %[level], " #N "(%[pout]) \n\t" |
192 | 192 |
193 static int QuantizeBlock(int16_t in[16], int16_t out[16], | 193 static int QuantizeBlock(int16_t in[16], int16_t out[16], |
194 const VP8Matrix* const mtx) { | 194 const VP8Matrix* const mtx) { |
195 int temp0, temp1, temp2, temp3, temp4, temp5; | 195 int temp0, temp1, temp2, temp3, temp4, temp5; |
196 int sign, coeff, level, i; | 196 int sign, coeff, level, i; |
197 int max_level = MAX_LEVEL; | 197 int max_level = MAX_LEVEL; |
198 | 198 |
199 int16_t* ppin = &in[0]; | 199 int16_t* ppin = &in[0]; |
200 int16_t* pout = &out[0]; | 200 int16_t* pout = &out[0]; |
201 const uint16_t* ppsharpen = &mtx->sharpen_[0]; | 201 const uint16_t* ppsharpen = &mtx->sharpen_[0]; |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
242 } | 242 } |
243 | 243 |
244 #undef QUANTIZE_ONE | 244 #undef QUANTIZE_ONE |
245 | 245 |
246 // macro for one horizontal pass in Disto4x4 (TTransform) | 246 // macro for one horizontal pass in Disto4x4 (TTransform) |
247 // two calls of function TTransform are merged into single one | 247 // two calls of function TTransform are merged into single one |
248 // A..D - offsets in bytes to load from a and b buffers | 248 // A..D - offsets in bytes to load from a and b buffers |
249 // E..H - offsets in bytes to store first results to tmp buffer | 249 // E..H - offsets in bytes to store first results to tmp buffer |
250 // E1..H1 - offsets in bytes to store second results to tmp buffer | 250 // E1..H1 - offsets in bytes to store second results to tmp buffer |
251 #define HORIZONTAL_PASS(A, B, C, D, E, F, G, H, E1, F1, G1, H1) \ | 251 #define HORIZONTAL_PASS(A, B, C, D, E, F, G, H, E1, F1, G1, H1) \ |
252 "lbu %[temp0], "#A"(%[a]) \n\t" \ | 252 "lbu %[temp0], " #A "(%[a]) \n\t" \ |
253 "lbu %[temp1], "#B"(%[a]) \n\t" \ | 253 "lbu %[temp1], " #B "(%[a]) \n\t" \ |
254 "lbu %[temp2], "#C"(%[a]) \n\t" \ | 254 "lbu %[temp2], " #C "(%[a]) \n\t" \ |
255 "lbu %[temp3], "#D"(%[a]) \n\t" \ | 255 "lbu %[temp3], " #D "(%[a]) \n\t" \ |
256 "lbu %[temp4], "#A"(%[b]) \n\t" \ | 256 "lbu %[temp4], " #A "(%[b]) \n\t" \ |
257 "lbu %[temp5], "#B"(%[b]) \n\t" \ | 257 "lbu %[temp5], " #B "(%[b]) \n\t" \ |
258 "lbu %[temp6], "#C"(%[b]) \n\t" \ | 258 "lbu %[temp6], " #C "(%[b]) \n\t" \ |
259 "lbu %[temp7], "#D"(%[b]) \n\t" \ | 259 "lbu %[temp7], " #D "(%[b]) \n\t" \ |
260 "addu %[temp8], %[temp0], %[temp2] \n\t" \ | 260 "addu %[temp8], %[temp0], %[temp2] \n\t" \ |
261 "subu %[temp0], %[temp0], %[temp2] \n\t" \ | 261 "subu %[temp0], %[temp0], %[temp2] \n\t" \ |
262 "addu %[temp2], %[temp1], %[temp3] \n\t" \ | 262 "addu %[temp2], %[temp1], %[temp3] \n\t" \ |
263 "subu %[temp1], %[temp1], %[temp3] \n\t" \ | 263 "subu %[temp1], %[temp1], %[temp3] \n\t" \ |
264 "addu %[temp3], %[temp4], %[temp6] \n\t" \ | 264 "addu %[temp3], %[temp4], %[temp6] \n\t" \ |
265 "subu %[temp4], %[temp4], %[temp6] \n\t" \ | 265 "subu %[temp4], %[temp4], %[temp6] \n\t" \ |
266 "addu %[temp6], %[temp5], %[temp7] \n\t" \ | 266 "addu %[temp6], %[temp5], %[temp7] \n\t" \ |
267 "subu %[temp5], %[temp5], %[temp7] \n\t" \ | 267 "subu %[temp5], %[temp5], %[temp7] \n\t" \ |
268 "addu %[temp7], %[temp8], %[temp2] \n\t" \ | 268 "addu %[temp7], %[temp8], %[temp2] \n\t" \ |
269 "subu %[temp2], %[temp8], %[temp2] \n\t" \ | 269 "subu %[temp2], %[temp8], %[temp2] \n\t" \ |
270 "addu %[temp8], %[temp0], %[temp1] \n\t" \ | 270 "addu %[temp8], %[temp0], %[temp1] \n\t" \ |
271 "subu %[temp0], %[temp0], %[temp1] \n\t" \ | 271 "subu %[temp0], %[temp0], %[temp1] \n\t" \ |
272 "addu %[temp1], %[temp3], %[temp6] \n\t" \ | 272 "addu %[temp1], %[temp3], %[temp6] \n\t" \ |
273 "subu %[temp3], %[temp3], %[temp6] \n\t" \ | 273 "subu %[temp3], %[temp3], %[temp6] \n\t" \ |
274 "addu %[temp6], %[temp4], %[temp5] \n\t" \ | 274 "addu %[temp6], %[temp4], %[temp5] \n\t" \ |
275 "subu %[temp4], %[temp4], %[temp5] \n\t" \ | 275 "subu %[temp4], %[temp4], %[temp5] \n\t" \ |
276 "sw %[temp7], "#E"(%[tmp]) \n\t" \ | 276 "sw %[temp7], " #E "(%[tmp]) \n\t" \ |
277 "sw %[temp2], "#H"(%[tmp]) \n\t" \ | 277 "sw %[temp2], " #H "(%[tmp]) \n\t" \ |
278 "sw %[temp8], "#F"(%[tmp]) \n\t" \ | 278 "sw %[temp8], " #F "(%[tmp]) \n\t" \ |
279 "sw %[temp0], "#G"(%[tmp]) \n\t" \ | 279 "sw %[temp0], " #G "(%[tmp]) \n\t" \ |
280 "sw %[temp1], "#E1"(%[tmp]) \n\t" \ | 280 "sw %[temp1], " #E1 "(%[tmp]) \n\t" \ |
281 "sw %[temp3], "#H1"(%[tmp]) \n\t" \ | 281 "sw %[temp3], " #H1 "(%[tmp]) \n\t" \ |
282 "sw %[temp6], "#F1"(%[tmp]) \n\t" \ | 282 "sw %[temp6], " #F1 "(%[tmp]) \n\t" \ |
283 "sw %[temp4], "#G1"(%[tmp]) \n\t" | 283 "sw %[temp4], " #G1 "(%[tmp]) \n\t" |
284 | 284 |
285 // macro for one vertical pass in Disto4x4 (TTransform) | 285 // macro for one vertical pass in Disto4x4 (TTransform) |
286 // two calls of function TTransform are merged into single one | 286 // two calls of function TTransform are merged into single one |
287 // since only one accu is available in mips32r1 instruction set | 287 // since only one accu is available in mips32r1 instruction set |
288 // first is done second call of function TTransform and after | 288 // first is done second call of function TTransform and after |
289 // that first one. | 289 // that first one. |
290 // const int sum1 = TTransform(a, w); | 290 // const int sum1 = TTransform(a, w); |
291 // const int sum2 = TTransform(b, w); | 291 // const int sum2 = TTransform(b, w); |
292 // return abs(sum2 - sum1) >> 5; | 292 // return abs(sum2 - sum1) >> 5; |
293 // (sum2 - sum1) is calculated with madds (sub2) and msubs (sub1) | 293 // (sum2 - sum1) is calculated with madds (sub2) and msubs (sub1) |
294 // A..D - offsets in bytes to load first results from tmp buffer | 294 // A..D - offsets in bytes to load first results from tmp buffer |
295 // A1..D1 - offsets in bytes to load second results from tmp buffer | 295 // A1..D1 - offsets in bytes to load second results from tmp buffer |
296 // E..H - offsets in bytes to load from w buffer | 296 // E..H - offsets in bytes to load from w buffer |
297 #define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H) \ | 297 #define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H) \ |
298 "lw %[temp0], "#A1"(%[tmp]) \n\t" \ | 298 "lw %[temp0], " #A1 "(%[tmp]) \n\t" \ |
299 "lw %[temp1], "#C1"(%[tmp]) \n\t" \ | 299 "lw %[temp1], " #C1 "(%[tmp]) \n\t" \ |
300 "lw %[temp2], "#B1"(%[tmp]) \n\t" \ | 300 "lw %[temp2], " #B1 "(%[tmp]) \n\t" \ |
301 "lw %[temp3], "#D1"(%[tmp]) \n\t" \ | 301 "lw %[temp3], " #D1 "(%[tmp]) \n\t" \ |
302 "addu %[temp8], %[temp0], %[temp1] \n\t" \ | 302 "addu %[temp8], %[temp0], %[temp1] \n\t" \ |
303 "subu %[temp0], %[temp0], %[temp1] \n\t" \ | 303 "subu %[temp0], %[temp0], %[temp1] \n\t" \ |
304 "addu %[temp1], %[temp2], %[temp3] \n\t" \ | 304 "addu %[temp1], %[temp2], %[temp3] \n\t" \ |
305 "subu %[temp2], %[temp2], %[temp3] \n\t" \ | 305 "subu %[temp2], %[temp2], %[temp3] \n\t" \ |
306 "addu %[temp3], %[temp8], %[temp1] \n\t" \ | 306 "addu %[temp3], %[temp8], %[temp1] \n\t" \ |
307 "subu %[temp8], %[temp8], %[temp1] \n\t" \ | 307 "subu %[temp8], %[temp8], %[temp1] \n\t" \ |
308 "addu %[temp1], %[temp0], %[temp2] \n\t" \ | 308 "addu %[temp1], %[temp0], %[temp2] \n\t" \ |
309 "subu %[temp0], %[temp0], %[temp2] \n\t" \ | 309 "subu %[temp0], %[temp0], %[temp2] \n\t" \ |
310 "sra %[temp4], %[temp3], 31 \n\t" \ | 310 "sra %[temp4], %[temp3], 31 \n\t" \ |
311 "sra %[temp5], %[temp1], 31 \n\t" \ | 311 "sra %[temp5], %[temp1], 31 \n\t" \ |
312 "sra %[temp6], %[temp0], 31 \n\t" \ | 312 "sra %[temp6], %[temp0], 31 \n\t" \ |
313 "sra %[temp7], %[temp8], 31 \n\t" \ | 313 "sra %[temp7], %[temp8], 31 \n\t" \ |
314 "xor %[temp3], %[temp3], %[temp4] \n\t" \ | 314 "xor %[temp3], %[temp3], %[temp4] \n\t" \ |
315 "xor %[temp1], %[temp1], %[temp5] \n\t" \ | 315 "xor %[temp1], %[temp1], %[temp5] \n\t" \ |
316 "xor %[temp0], %[temp0], %[temp6] \n\t" \ | 316 "xor %[temp0], %[temp0], %[temp6] \n\t" \ |
317 "xor %[temp8], %[temp8], %[temp7] \n\t" \ | 317 "xor %[temp8], %[temp8], %[temp7] \n\t" \ |
318 "subu %[temp3], %[temp3], %[temp4] \n\t" \ | 318 "subu %[temp3], %[temp3], %[temp4] \n\t" \ |
319 "subu %[temp1], %[temp1], %[temp5] \n\t" \ | 319 "subu %[temp1], %[temp1], %[temp5] \n\t" \ |
320 "subu %[temp0], %[temp0], %[temp6] \n\t" \ | 320 "subu %[temp0], %[temp0], %[temp6] \n\t" \ |
321 "subu %[temp8], %[temp8], %[temp7] \n\t" \ | 321 "subu %[temp8], %[temp8], %[temp7] \n\t" \ |
322 "lhu %[temp4], "#E"(%[w]) \n\t" \ | 322 "lhu %[temp4], " #E "(%[w]) \n\t" \ |
323 "lhu %[temp5], "#F"(%[w]) \n\t" \ | 323 "lhu %[temp5], " #F "(%[w]) \n\t" \ |
324 "lhu %[temp6], "#G"(%[w]) \n\t" \ | 324 "lhu %[temp6], " #G "(%[w]) \n\t" \ |
325 "lhu %[temp7], "#H"(%[w]) \n\t" \ | 325 "lhu %[temp7], " #H "(%[w]) \n\t" \ |
326 "madd %[temp4], %[temp3] \n\t" \ | 326 "madd %[temp4], %[temp3] \n\t" \ |
327 "madd %[temp5], %[temp1] \n\t" \ | 327 "madd %[temp5], %[temp1] \n\t" \ |
328 "madd %[temp6], %[temp0] \n\t" \ | 328 "madd %[temp6], %[temp0] \n\t" \ |
329 "madd %[temp7], %[temp8] \n\t" \ | 329 "madd %[temp7], %[temp8] \n\t" \ |
330 "lw %[temp0], "#A"(%[tmp]) \n\t" \ | 330 "lw %[temp0], " #A "(%[tmp]) \n\t" \ |
331 "lw %[temp1], "#C"(%[tmp]) \n\t" \ | 331 "lw %[temp1], " #C "(%[tmp]) \n\t" \ |
332 "lw %[temp2], "#B"(%[tmp]) \n\t" \ | 332 "lw %[temp2], " #B "(%[tmp]) \n\t" \ |
333 "lw %[temp3], "#D"(%[tmp]) \n\t" \ | 333 "lw %[temp3], " #D "(%[tmp]) \n\t" \ |
334 "addu %[temp8], %[temp0], %[temp1] \n\t" \ | 334 "addu %[temp8], %[temp0], %[temp1] \n\t" \ |
335 "subu %[temp0], %[temp0], %[temp1] \n\t" \ | 335 "subu %[temp0], %[temp0], %[temp1] \n\t" \ |
336 "addu %[temp1], %[temp2], %[temp3] \n\t" \ | 336 "addu %[temp1], %[temp2], %[temp3] \n\t" \ |
337 "subu %[temp2], %[temp2], %[temp3] \n\t" \ | 337 "subu %[temp2], %[temp2], %[temp3] \n\t" \ |
338 "addu %[temp3], %[temp8], %[temp1] \n\t" \ | 338 "addu %[temp3], %[temp8], %[temp1] \n\t" \ |
339 "subu %[temp1], %[temp8], %[temp1] \n\t" \ | 339 "subu %[temp1], %[temp8], %[temp1] \n\t" \ |
340 "addu %[temp8], %[temp0], %[temp2] \n\t" \ | 340 "addu %[temp8], %[temp0], %[temp2] \n\t" \ |
341 "subu %[temp0], %[temp0], %[temp2] \n\t" \ | 341 "subu %[temp0], %[temp0], %[temp2] \n\t" \ |
342 "sra %[temp2], %[temp3], 31 \n\t" \ | 342 "sra %[temp2], %[temp3], 31 \n\t" \ |
343 "xor %[temp3], %[temp3], %[temp2] \n\t" \ | 343 "xor %[temp3], %[temp3], %[temp2] \n\t" \ |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
400 D += Disto4x4(a + x + y, b + x + y, w); | 400 D += Disto4x4(a + x + y, b + x + y, w); |
401 } | 401 } |
402 } | 402 } |
403 return D; | 403 return D; |
404 } | 404 } |
405 | 405 |
406 // macro for one horizontal pass in FTransform | 406 // macro for one horizontal pass in FTransform |
407 // temp0..temp15 holds tmp[0]..tmp[15] | 407 // temp0..temp15 holds tmp[0]..tmp[15] |
408 // A..D - offsets in bytes to load from src and ref buffers | 408 // A..D - offsets in bytes to load from src and ref buffers |
409 // TEMP0..TEMP3 - registers for corresponding tmp elements | 409 // TEMP0..TEMP3 - registers for corresponding tmp elements |
410 #define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP1, TEMP2, TEMP3) \ | 410 #define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP1, TEMP2, TEMP3) \ |
411 "lw %["#TEMP1"], 0(%[args]) \n\t" \ | 411 "lw %[" #TEMP1 "], 0(%[args]) \n\t" \ |
412 "lw %["#TEMP2"], 4(%[args]) \n\t" \ | 412 "lw %[" #TEMP2 "], 4(%[args]) \n\t" \ |
413 "lbu %[temp16], "#A"(%["#TEMP1"]) \n\t" \ | 413 "lbu %[temp16], " #A "(%[" #TEMP1 "]) \n\t" \ |
414 "lbu %[temp17], "#A"(%["#TEMP2"]) \n\t" \ | 414 "lbu %[temp17], " #A "(%[" #TEMP2 "]) \n\t" \ |
415 "lbu %[temp18], "#B"(%["#TEMP1"]) \n\t" \ | 415 "lbu %[temp18], " #B "(%[" #TEMP1 "]) \n\t" \ |
416 "lbu %[temp19], "#B"(%["#TEMP2"]) \n\t" \ | 416 "lbu %[temp19], " #B "(%[" #TEMP2 "]) \n\t" \ |
417 "subu %[temp20], %[temp16], %[temp17] \n\t" \ | 417 "subu %[temp20], %[temp16], %[temp17] \n\t" \ |
418 "lbu %[temp16], "#C"(%["#TEMP1"]) \n\t" \ | 418 "lbu %[temp16], " #C "(%[" #TEMP1 "]) \n\t" \ |
419 "lbu %[temp17], "#C"(%["#TEMP2"]) \n\t" \ | 419 "lbu %[temp17], " #C "(%[" #TEMP2 "]) \n\t" \ |
420 "subu %["#TEMP0"], %[temp18], %[temp19] \n\t" \ | 420 "subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \ |
421 "lbu %[temp18], "#D"(%["#TEMP1"]) \n\t" \ | 421 "lbu %[temp18], " #D "(%[" #TEMP1 "]) \n\t" \ |
422 "lbu %[temp19], "#D"(%["#TEMP2"]) \n\t" \ | 422 "lbu %[temp19], " #D "(%[" #TEMP2 "]) \n\t" \ |
423 "subu %["#TEMP1"], %[temp16], %[temp17] \n\t" \ | 423 "subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \ |
424 "subu %["#TEMP2"], %[temp18], %[temp19] \n\t" \ | 424 "subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \ |
425 "addu %["#TEMP3"], %[temp20], %["#TEMP2"] \n\t" \ | 425 "addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \ |
426 "subu %["#TEMP2"], %[temp20], %["#TEMP2"] \n\t" \ | 426 "subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \ |
427 "addu %[temp20], %["#TEMP0"], %["#TEMP1"] \n\t" \ | 427 "addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \ |
428 "subu %["#TEMP0"], %["#TEMP0"], %["#TEMP1"] \n\t" \ | 428 "subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \ |
429 "mul %[temp16], %["#TEMP2"], %[c5352] \n\t" \ | 429 "mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \ |
430 "mul %[temp17], %["#TEMP2"], %[c2217] \n\t" \ | 430 "mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \ |
431 "mul %[temp18], %["#TEMP0"], %[c5352] \n\t" \ | 431 "mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \ |
432 "mul %[temp19], %["#TEMP0"], %[c2217] \n\t" \ | 432 "mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \ |
433 "addu %["#TEMP1"], %["#TEMP3"], %[temp20] \n\t" \ | 433 "addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \ |
434 "subu %[temp20], %["#TEMP3"], %[temp20] \n\t" \ | 434 "subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \ |
435 "sll %["#TEMP0"], %["#TEMP1"], 3 \n\t" \ | 435 "sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \ |
436 "sll %["#TEMP2"], %[temp20], 3 \n\t" \ | 436 "sll %[" #TEMP2 "], %[temp20], 3 \n\t" \ |
437 "addiu %[temp16], %[temp16], 1812 \n\t" \ | 437 "addiu %[temp16], %[temp16], 1812 \n\t" \ |
438 "addiu %[temp17], %[temp17], 937 \n\t" \ | 438 "addiu %[temp17], %[temp17], 937 \n\t" \ |
439 "addu %[temp16], %[temp16], %[temp19] \n\t" \ | 439 "addu %[temp16], %[temp16], %[temp19] \n\t" \ |
440 "subu %[temp17], %[temp17], %[temp18] \n\t" \ | 440 "subu %[temp17], %[temp17], %[temp18] \n\t" \ |
441 "sra %["#TEMP1"], %[temp16], 9 \n\t" \ | 441 "sra %[" #TEMP1 "], %[temp16], 9 \n\t" \ |
442 "sra %["#TEMP3"], %[temp17], 9 \n\t" | 442 "sra %[" #TEMP3 "], %[temp17], 9 \n\t" |
443 | 443 |
444 // macro for one vertical pass in FTransform | 444 // macro for one vertical pass in FTransform |
445 // temp0..temp15 holds tmp[0]..tmp[15] | 445 // temp0..temp15 holds tmp[0]..tmp[15] |
446 // A..D - offsets in bytes to store to out buffer | 446 // A..D - offsets in bytes to store to out buffer |
447 // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements | 447 // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements |
448 #define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \ | 448 #define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \ |
449 "addu %[temp16], %["#TEMP0"], %["#TEMP12"] \n\t" \ | 449 "addu %[temp16], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \ |
450 "subu %[temp19], %["#TEMP0"], %["#TEMP12"] \n\t" \ | 450 "subu %[temp19], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \ |
451 "addu %[temp17], %["#TEMP4"], %["#TEMP8"] \n\t" \ | 451 "addu %[temp17], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \ |
452 "subu %[temp18], %["#TEMP4"], %["#TEMP8"] \n\t" \ | 452 "subu %[temp18], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \ |
453 "mul %["#TEMP8"], %[temp19], %[c2217] \n\t" \ | 453 "mul %[" #TEMP8 "], %[temp19], %[c2217] \n\t" \ |
454 "mul %["#TEMP12"], %[temp18], %[c2217] \n\t" \ | 454 "mul %[" #TEMP12 "], %[temp18], %[c2217] \n\t" \ |
455 "mul %["#TEMP4"], %[temp19], %[c5352] \n\t" \ | 455 "mul %[" #TEMP4 "], %[temp19], %[c5352] \n\t" \ |
456 "mul %[temp18], %[temp18], %[c5352] \n\t" \ | 456 "mul %[temp18], %[temp18], %[c5352] \n\t" \ |
457 "addiu %[temp16], %[temp16], 7 \n\t" \ | 457 "addiu %[temp16], %[temp16], 7 \n\t" \ |
458 "addu %["#TEMP0"], %[temp16], %[temp17] \n\t" \ | 458 "addu %[" #TEMP0 "], %[temp16], %[temp17] \n\t" \ |
459 "sra %["#TEMP0"], %["#TEMP0"], 4 \n\t" \ | 459 "sra %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \ |
460 "addu %["#TEMP12"], %["#TEMP12"], %["#TEMP4"] \n\t" \ | 460 "addu %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "] \n\t" \ |
461 "subu %["#TEMP4"], %[temp16], %[temp17] \n\t" \ | 461 "subu %[" #TEMP4 "], %[temp16], %[temp17] \n\t" \ |
462 "sra %["#TEMP4"], %["#TEMP4"], 4 \n\t" \ | 462 "sra %[" #TEMP4 "], %[" #TEMP4 "], 4 \n\t" \ |
463 "addiu %["#TEMP8"], %["#TEMP8"], 30000 \n\t" \ | 463 "addiu %[" #TEMP8 "], %[" #TEMP8 "], 30000 \n\t" \ |
464 "addiu %["#TEMP12"], %["#TEMP12"], 12000 \n\t" \ | 464 "addiu %[" #TEMP12 "], %[" #TEMP12 "], 12000 \n\t" \ |
465 "addiu %["#TEMP8"], %["#TEMP8"], 21000 \n\t" \ | 465 "addiu %[" #TEMP8 "], %[" #TEMP8 "], 21000 \n\t" \ |
466 "subu %["#TEMP8"], %["#TEMP8"], %[temp18] \n\t" \ | 466 "subu %[" #TEMP8 "], %[" #TEMP8 "], %[temp18] \n\t" \ |
467 "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \ | 467 "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \ |
468 "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \ | 468 "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \ |
469 "addiu %[temp16], %["#TEMP12"], 1 \n\t" \ | 469 "addiu %[temp16], %[" #TEMP12 "], 1 \n\t" \ |
470 "movn %["#TEMP12"], %[temp16], %[temp19] \n\t" \ | 470 "movn %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \ |
471 "sh %["#TEMP0"], "#A"(%[temp20]) \n\t" \ | 471 "sh %[" #TEMP0 "], " #A "(%[temp20]) \n\t" \ |
472 "sh %["#TEMP4"], "#C"(%[temp20]) \n\t" \ | 472 "sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \ |
473 "sh %["#TEMP8"], "#D"(%[temp20]) \n\t" \ | 473 "sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \ |
474 "sh %["#TEMP12"], "#B"(%[temp20]) \n\t" | 474 "sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t" |
475 | 475 |
476 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { | 476 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { |
477 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; | 477 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; |
478 int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16; | 478 int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16; |
479 int temp17, temp18, temp19, temp20; | 479 int temp17, temp18, temp19, temp20; |
480 const int c2217 = 2217; | 480 const int c2217 = 2217; |
481 const int c5352 = 5352; | 481 const int c5352 = 5352; |
482 const int* const args[3] = | 482 const int* const args[3] = |
483 { (const int*)src, (const int*)ref, (const int*)out }; | 483 { (const int*)src, (const int*)ref, (const int*)out }; |
484 | 484 |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
615 const int b = VP8EncBands[n + 1]; | 615 const int b = VP8EncBands[n + 1]; |
616 const int ctx = (v == 1) ? 1 : 2; | 616 const int ctx = (v == 1) ? 1 : 2; |
617 const int last_p0 = res->prob[b][ctx][0]; | 617 const int last_p0 = res->prob[b][ctx][0]; |
618 cost += VP8BitCost(0, last_p0); | 618 cost += VP8BitCost(0, last_p0); |
619 } | 619 } |
620 } | 620 } |
621 return cost; | 621 return cost; |
622 } | 622 } |
623 | 623 |
624 #define GET_SSE_INNER(A, B, C, D) \ | 624 #define GET_SSE_INNER(A, B, C, D) \ |
625 "lbu %[temp0], "#A"(%[a]) \n\t" \ | 625 "lbu %[temp0], " #A "(%[a]) \n\t" \ |
626 "lbu %[temp1], "#A"(%[b]) \n\t" \ | 626 "lbu %[temp1], " #A "(%[b]) \n\t" \ |
627 "lbu %[temp2], "#B"(%[a]) \n\t" \ | 627 "lbu %[temp2], " #B "(%[a]) \n\t" \ |
628 "lbu %[temp3], "#B"(%[b]) \n\t" \ | 628 "lbu %[temp3], " #B "(%[b]) \n\t" \ |
629 "lbu %[temp4], "#C"(%[a]) \n\t" \ | 629 "lbu %[temp4], " #C "(%[a]) \n\t" \ |
630 "lbu %[temp5], "#C"(%[b]) \n\t" \ | 630 "lbu %[temp5], " #C "(%[b]) \n\t" \ |
631 "lbu %[temp6], "#D"(%[a]) \n\t" \ | 631 "lbu %[temp6], " #D "(%[a]) \n\t" \ |
632 "lbu %[temp7], "#D"(%[b]) \n\t" \ | 632 "lbu %[temp7], " #D "(%[b]) \n\t" \ |
633 "subu %[temp0], %[temp0], %[temp1] \n\t" \ | 633 "subu %[temp0], %[temp0], %[temp1] \n\t" \ |
634 "subu %[temp2], %[temp2], %[temp3] \n\t" \ | 634 "subu %[temp2], %[temp2], %[temp3] \n\t" \ |
635 "subu %[temp4], %[temp4], %[temp5] \n\t" \ | 635 "subu %[temp4], %[temp4], %[temp5] \n\t" \ |
636 "subu %[temp6], %[temp6], %[temp7] \n\t" \ | 636 "subu %[temp6], %[temp6], %[temp7] \n\t" \ |
637 "madd %[temp0], %[temp0] \n\t" \ | 637 "madd %[temp0], %[temp0] \n\t" \ |
638 "madd %[temp2], %[temp2] \n\t" \ | 638 "madd %[temp2], %[temp2] \n\t" \ |
639 "madd %[temp4], %[temp4] \n\t" \ | 639 "madd %[temp4], %[temp4] \n\t" \ |
640 "madd %[temp6], %[temp6] \n\t" | 640 "madd %[temp6], %[temp6] \n\t" |
641 | 641 |
642 #define GET_SSE(A, B, C, D) \ | 642 #define GET_SSE(A, B, C, D) \ |
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
767 VP8TDisto16x16 = Disto16x16; | 767 VP8TDisto16x16 = Disto16x16; |
768 VP8FTransform = FTransform; | 768 VP8FTransform = FTransform; |
769 #if !defined(WORK_AROUND_GCC) | 769 #if !defined(WORK_AROUND_GCC) |
770 VP8SSE16x16 = SSE16x16; | 770 VP8SSE16x16 = SSE16x16; |
771 VP8SSE8x8 = SSE8x8; | 771 VP8SSE8x8 = SSE8x8; |
772 VP8SSE16x8 = SSE16x8; | 772 VP8SSE16x8 = SSE16x8; |
773 VP8SSE4x4 = SSE4x4; | 773 VP8SSE4x4 = SSE4x4; |
774 #endif | 774 #endif |
775 #endif // WEBP_USE_MIPS32 | 775 #endif // WEBP_USE_MIPS32 |
776 } | 776 } |
OLD | NEW |