Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(267)

Side by Side Diff: source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12
13 #include "./vpx_config.h"
14 #include "./vp9_rtcd.h"
15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_blockd.h"
17 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
18 #include "vpx_dsp/txfm_common.h"
19 #include "vpx_ports/mem.h"
20
21 #if HAVE_DSPR2
22 void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
23 int dest_stride) {
24 int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
25 int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
26 int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19;
27 int16_t step1_20, step1_21, step1_22, step1_23, step1_24, step1_25, step1_26;
28 int16_t step1_27, step1_28, step1_29, step1_30, step1_31;
29 int16_t step2_0, step2_1, step2_2, step2_3, step2_4, step2_5, step2_6;
30 int16_t step2_7, step2_8, step2_9, step2_10, step2_11, step2_12, step2_13;
31 int16_t step2_14, step2_15, step2_16, step2_17, step2_18, step2_19, step2_20;
32 int16_t step2_21, step2_22, step2_23, step2_24, step2_25, step2_26, step2_27;
33 int16_t step2_28, step2_29, step2_30, step2_31;
34 int16_t step3_8, step3_9, step3_10, step3_11, step3_12, step3_13, step3_14;
35 int16_t step3_15, step3_16, step3_17, step3_18, step3_19, step3_20, step3_21;
36 int16_t step3_22, step3_23, step3_24, step3_25, step3_26, step3_27;
37 int16_t step3_28, step3_29, step3_30, step3_31;
38 int temp0, temp1, temp2, temp3;
39 int load1, load2, load3, load4;
40 int result1, result2;
41 int i, temp21;
42 uint8_t *dest_pix, *dest_pix1;
43 const int const_2_power_13 = 8192;
44 uint8_t *cm = vpx_ff_cropTbl;
45
46 /* prefetch vpx_ff_cropTbl */
47 prefetch_load(vpx_ff_cropTbl);
48 prefetch_load(vpx_ff_cropTbl + 32);
49 prefetch_load(vpx_ff_cropTbl + 64);
50 prefetch_load(vpx_ff_cropTbl + 96);
51 prefetch_load(vpx_ff_cropTbl + 128);
52 prefetch_load(vpx_ff_cropTbl + 160);
53 prefetch_load(vpx_ff_cropTbl + 192);
54 prefetch_load(vpx_ff_cropTbl + 224);
55
56 for (i = 0; i < 32; ++i) {
57 dest_pix = dest + i;
58 dest_pix1 = dest + i + 31 * dest_stride;
59
60 __asm__ __volatile__ (
61 "lh %[load1], 2(%[input]) \n\t"
62 "lh %[load2], 62(%[input]) \n\t"
63 "lh %[load3], 34(%[input]) \n\t"
64 "lh %[load4], 30(%[input]) \n\t"
65
66 "mtlo %[const_2_power_13], $ac1 \n\t"
67 "mthi $zero, $ac1 \n\t"
68 "mtlo %[const_2_power_13], $ac3 \n\t"
69 "mthi $zero, $ac3 \n\t"
70
71 "madd $ac1, %[load1], %[cospi_31_64] \n\t"
72 "msub $ac1, %[load2], %[cospi_1_64] \n\t"
73 "extp %[temp0], $ac1, 31 \n\t"
74
75 "madd $ac3, %[load1], %[cospi_1_64] \n\t"
76 "madd $ac3, %[load2], %[cospi_31_64] \n\t"
77 "extp %[temp3], $ac3, 31 \n\t"
78
79 "mtlo %[const_2_power_13], $ac1 \n\t"
80 "mthi $zero, $ac1 \n\t"
81 "mtlo %[const_2_power_13], $ac2 \n\t"
82 "mthi $zero, $ac2 \n\t"
83
84 "madd $ac2, %[load3], %[cospi_15_64] \n\t"
85 "msub $ac2, %[load4], %[cospi_17_64] \n\t"
86 "extp %[temp1], $ac2, 31 \n\t"
87
88 "madd $ac1, %[load3], %[cospi_17_64] \n\t"
89 "madd $ac1, %[load4], %[cospi_15_64] \n\t"
90 "extp %[temp2], $ac1, 31 \n\t"
91
92 "mtlo %[const_2_power_13], $ac1 \n\t"
93 "mthi $zero, $ac1 \n\t"
94 "mtlo %[const_2_power_13], $ac3 \n\t"
95 "mthi $zero, $ac3 \n\t"
96
97 "sub %[load1], %[temp3], %[temp2] \n\t"
98 "sub %[load2], %[temp0], %[temp1] \n\t"
99
100 "madd $ac1, %[load1], %[cospi_28_64] \n\t"
101 "msub $ac1, %[load2], %[cospi_4_64] \n\t"
102 "madd $ac3, %[load1], %[cospi_4_64] \n\t"
103 "madd $ac3, %[load2], %[cospi_28_64] \n\t"
104
105 "extp %[step1_17], $ac1, 31 \n\t"
106 "extp %[step1_30], $ac3, 31 \n\t"
107 "add %[step1_16], %[temp0], %[temp1] \n\t"
108 "add %[step1_31], %[temp2], %[temp3] \n\t"
109
110 : [load1] "=&r" (load1), [load2] "=&r" (load2), [load3] "=&r" (load3),
111 [load4] "=&r" (load4), [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
112 [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
113 [step1_16] "=r" (step1_16), [step1_17] "=r" (step1_17),
114 [step1_30] "=r" (step1_30), [step1_31] "=r" (step1_31)
115 : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
116 [cospi_31_64] "r" (cospi_31_64), [cospi_1_64] "r" (cospi_1_64),
117 [cospi_4_64] "r" (cospi_4_64), [cospi_17_64] "r" (cospi_17_64),
118 [cospi_15_64] "r" (cospi_15_64), [cospi_28_64] "r" (cospi_28_64)
119 );
120
121 __asm__ __volatile__ (
122 "lh %[load1], 18(%[input]) \n\t"
123 "lh %[load2], 46(%[input]) \n\t"
124 "lh %[load3], 50(%[input]) \n\t"
125 "lh %[load4], 14(%[input]) \n\t"
126
127 "mtlo %[const_2_power_13], $ac1 \n\t"
128 "mthi $zero, $ac1 \n\t"
129 "mtlo %[const_2_power_13], $ac3 \n\t"
130 "mthi $zero, $ac3 \n\t"
131
132 "madd $ac1, %[load1], %[cospi_23_64] \n\t"
133 "msub $ac1, %[load2], %[cospi_9_64] \n\t"
134 "extp %[temp0], $ac1, 31 \n\t"
135
136 "madd $ac3, %[load1], %[cospi_9_64] \n\t"
137 "madd $ac3, %[load2], %[cospi_23_64] \n\t"
138 "extp %[temp3], $ac3, 31 \n\t"
139
140 "mtlo %[const_2_power_13], $ac1 \n\t"
141 "mthi $zero, $ac1 \n\t"
142 "mtlo %[const_2_power_13], $ac2 \n\t"
143 "mthi $zero, $ac2 \n\t"
144
145 "madd $ac2, %[load3], %[cospi_7_64] \n\t"
146 "msub $ac2, %[load4], %[cospi_25_64] \n\t"
147 "extp %[temp1], $ac2, 31 \n\t"
148
149 "madd $ac1, %[load3], %[cospi_25_64] \n\t"
150 "madd $ac1, %[load4], %[cospi_7_64] \n\t"
151 "extp %[temp2], $ac1, 31 \n\t"
152
153 "mtlo %[const_2_power_13], $ac1 \n\t"
154 "mthi $zero, $ac1 \n\t"
155 "mtlo %[const_2_power_13], $ac3 \n\t"
156 "mthi $zero, $ac3 \n\t"
157
158 "sub %[load1], %[temp1], %[temp0] \n\t"
159 "sub %[load2], %[temp2], %[temp3] \n\t"
160
161 "msub $ac1, %[load1], %[cospi_28_64] \n\t"
162 "msub $ac1, %[load2], %[cospi_4_64] \n\t"
163 "msub $ac3, %[load1], %[cospi_4_64] \n\t"
164 "madd $ac3, %[load2], %[cospi_28_64] \n\t"
165
166 "extp %[step1_18], $ac1, 31 \n\t"
167 "extp %[step1_29], $ac3, 31 \n\t"
168 "add %[step1_19], %[temp0], %[temp1] \n\t"
169 "add %[step1_28], %[temp2], %[temp3] \n\t"
170
171 : [load1] "=&r" (load1), [load2] "=&r" (load2), [load3] "=&r" (load3),
172 [load4] "=&r" (load4), [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
173 [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
174 [step1_18] "=r" (step1_18), [step1_19] "=r" (step1_19),
175 [step1_28] "=r" (step1_28), [step1_29] "=r" (step1_29)
176 : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
177 [cospi_23_64] "r" (cospi_23_64), [cospi_9_64] "r" (cospi_9_64),
178 [cospi_4_64] "r" (cospi_4_64), [cospi_7_64] "r" (cospi_7_64),
179 [cospi_25_64] "r" (cospi_25_64), [cospi_28_64] "r" (cospi_28_64)
180 );
181
182 __asm__ __volatile__ (
183 "lh %[load1], 10(%[input]) \n\t"
184 "lh %[load2], 54(%[input]) \n\t"
185 "lh %[load3], 42(%[input]) \n\t"
186 "lh %[load4], 22(%[input]) \n\t"
187
188 "mtlo %[const_2_power_13], $ac1 \n\t"
189 "mthi $zero, $ac1 \n\t"
190 "mtlo %[const_2_power_13], $ac3 \n\t"
191 "mthi $zero, $ac3 \n\t"
192
193 "madd $ac1, %[load1], %[cospi_27_64] \n\t"
194 "msub $ac1, %[load2], %[cospi_5_64] \n\t"
195 "extp %[temp0], $ac1, 31 \n\t"
196
197 "madd $ac3, %[load1], %[cospi_5_64] \n\t"
198 "madd $ac3, %[load2], %[cospi_27_64] \n\t"
199 "extp %[temp3], $ac3, 31 \n\t"
200
201 "mtlo %[const_2_power_13], $ac1 \n\t"
202 "mthi $zero, $ac1 \n\t"
203 "mtlo %[const_2_power_13], $ac2 \n\t"
204 "mthi $zero, $ac2 \n\t"
205
206 "madd $ac2, %[load3], %[cospi_11_64] \n\t"
207 "msub $ac2, %[load4], %[cospi_21_64] \n\t"
208 "extp %[temp1], $ac2, 31 \n\t"
209
210 "madd $ac1, %[load3], %[cospi_21_64] \n\t"
211 "madd $ac1, %[load4], %[cospi_11_64] \n\t"
212 "extp %[temp2], $ac1, 31 \n\t"
213
214 "mtlo %[const_2_power_13], $ac1 \n\t"
215 "mthi $zero, $ac1 \n\t"
216 "mtlo %[const_2_power_13], $ac3 \n\t"
217 "mthi $zero, $ac3 \n\t"
218
219 "sub %[load1], %[temp0], %[temp1] \n\t"
220 "sub %[load2], %[temp3], %[temp2] \n\t"
221
222 "madd $ac1, %[load2], %[cospi_12_64] \n\t"
223 "msub $ac1, %[load1], %[cospi_20_64] \n\t"
224 "madd $ac3, %[load1], %[cospi_12_64] \n\t"
225 "madd $ac3, %[load2], %[cospi_20_64] \n\t"
226
227 "extp %[step1_21], $ac1, 31 \n\t"
228 "extp %[step1_26], $ac3, 31 \n\t"
229 "add %[step1_20], %[temp0], %[temp1] \n\t"
230 "add %[step1_27], %[temp2], %[temp3] \n\t"
231
232 : [load1] "=&r" (load1), [load2] "=&r" (load2), [load3] "=&r" (load3),
233 [load4] "=&r" (load4), [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
234 [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
235 [step1_20] "=r" (step1_20), [step1_21] "=r" (step1_21),
236 [step1_26] "=r" (step1_26), [step1_27] "=r" (step1_27)
237 : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
238 [cospi_27_64] "r" (cospi_27_64), [cospi_5_64] "r" (cospi_5_64),
239 [cospi_11_64] "r" (cospi_11_64), [cospi_21_64] "r" (cospi_21_64),
240 [cospi_12_64] "r" (cospi_12_64), [cospi_20_64] "r" (cospi_20_64)
241 );
242
243 __asm__ __volatile__ (
244 "lh %[load1], 26(%[input]) \n\t"
245 "lh %[load2], 38(%[input]) \n\t"
246 "lh %[load3], 58(%[input]) \n\t"
247 "lh %[load4], 6(%[input]) \n\t"
248
249 "mtlo %[const_2_power_13], $ac1 \n\t"
250 "mthi $zero, $ac1 \n\t"
251 "mtlo %[const_2_power_13], $ac3 \n\t"
252 "mthi $zero, $ac3 \n\t"
253
254 "madd $ac1, %[load1], %[cospi_19_64] \n\t"
255 "msub $ac1, %[load2], %[cospi_13_64] \n\t"
256 "extp %[temp0], $ac1, 31 \n\t"
257 "madd $ac3, %[load1], %[cospi_13_64] \n\t"
258 "madd $ac3, %[load2], %[cospi_19_64] \n\t"
259 "extp %[temp3], $ac3, 31 \n\t"
260
261 "mtlo %[const_2_power_13], $ac1 \n\t"
262 "mthi $zero, $ac1 \n\t"
263 "mtlo %[const_2_power_13], $ac2 \n\t"
264 "mthi $zero, $ac2 \n\t"
265
266 "madd $ac2, %[load3], %[cospi_3_64] \n\t"
267 "msub $ac2, %[load4], %[cospi_29_64] \n\t"
268 "extp %[temp1], $ac2, 31 \n\t"
269 "madd $ac1, %[load3], %[cospi_29_64] \n\t"
270 "madd $ac1, %[load4], %[cospi_3_64] \n\t"
271 "extp %[temp2], $ac1, 31 \n\t"
272
273 "mtlo %[const_2_power_13], $ac1 \n\t"
274 "mthi $zero, $ac1 \n\t"
275 "mtlo %[const_2_power_13], $ac3 \n\t"
276 "mthi $zero, $ac3 \n\t"
277
278 "sub %[load1], %[temp1], %[temp0] \n\t"
279 "sub %[load2], %[temp2], %[temp3] \n\t"
280 "msub $ac1, %[load1], %[cospi_12_64] \n\t"
281 "msub $ac1, %[load2], %[cospi_20_64] \n\t"
282 "msub $ac3, %[load1], %[cospi_20_64] \n\t"
283 "madd $ac3, %[load2], %[cospi_12_64] \n\t"
284 "extp %[step1_22], $ac1, 31 \n\t"
285 "extp %[step1_25], $ac3, 31 \n\t"
286 "add %[step1_23], %[temp0], %[temp1] \n\t"
287 "add %[step1_24], %[temp2], %[temp3] \n\t"
288
289 : [load1] "=&r" (load1), [load2] "=&r" (load2), [load3] "=&r" (load3),
290 [load4] "=&r" (load4), [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
291 [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
292 [step1_22] "=r" (step1_22), [step1_23] "=r" (step1_23),
293 [step1_24] "=r" (step1_24), [step1_25] "=r" (step1_25)
294 : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
295 [cospi_19_64] "r" (cospi_19_64), [cospi_13_64] "r" (cospi_13_64),
296 [cospi_3_64] "r" (cospi_3_64), [cospi_29_64] "r" (cospi_29_64),
297 [cospi_12_64] "r" (cospi_12_64), [cospi_20_64] "r" (cospi_20_64)
298 );
299
300 __asm__ __volatile__ (
301 "lh %[load1], 4(%[input]) \n\t"
302 "lh %[load2], 60(%[input]) \n\t"
303 "lh %[load3], 36(%[input]) \n\t"
304 "lh %[load4], 28(%[input]) \n\t"
305
306 "mtlo %[const_2_power_13], $ac1 \n\t"
307 "mthi $zero, $ac1 \n\t"
308 "mtlo %[const_2_power_13], $ac3 \n\t"
309 "mthi $zero, $ac3 \n\t"
310
311 "madd $ac1, %[load1], %[cospi_30_64] \n\t"
312 "msub $ac1, %[load2], %[cospi_2_64] \n\t"
313 "extp %[temp0], $ac1, 31 \n\t"
314 "madd $ac3, %[load1], %[cospi_2_64] \n\t"
315 "madd $ac3, %[load2], %[cospi_30_64] \n\t"
316 "extp %[temp3], $ac3, 31 \n\t"
317
318 "mtlo %[const_2_power_13], $ac1 \n\t"
319 "mthi $zero, $ac1 \n\t"
320 "mtlo %[const_2_power_13], $ac2 \n\t"
321 "mthi $zero, $ac2 \n\t"
322
323 "madd $ac2, %[load3], %[cospi_14_64] \n\t"
324 "msub $ac2, %[load4], %[cospi_18_64] \n\t"
325 "extp %[temp1], $ac2, 31 \n\t"
326 "madd $ac1, %[load3], %[cospi_18_64] \n\t"
327 "madd $ac1, %[load4], %[cospi_14_64] \n\t"
328 "extp %[temp2], $ac1, 31 \n\t"
329
330 "mtlo %[const_2_power_13], $ac1 \n\t"
331 "mthi $zero, $ac1 \n\t"
332 "mtlo %[const_2_power_13], $ac3 \n\t"
333 "mthi $zero, $ac3 \n\t"
334
335 "sub %[load1], %[temp0], %[temp1] \n\t"
336 "sub %[load2], %[temp3], %[temp2] \n\t"
337 "msub $ac1, %[load1], %[cospi_8_64] \n\t"
338 "madd $ac1, %[load2], %[cospi_24_64] \n\t"
339 "madd $ac3, %[load1], %[cospi_24_64] \n\t"
340 "madd $ac3, %[load2], %[cospi_8_64] \n\t"
341 "extp %[step2_9], $ac1, 31 \n\t"
342 "extp %[step2_14], $ac3, 31 \n\t"
343 "add %[step2_8], %[temp0], %[temp1] \n\t"
344 "add %[step2_15], %[temp2], %[temp3] \n\t"
345
346 : [load1] "=&r" (load1), [load2] "=&r" (load2), [load3] "=&r" (load3),
347 [load4] "=&r" (load4), [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
348 [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
349 [step2_8] "=r" (step2_8), [step2_9] "=r" (step2_9),
350 [step2_14] "=r" (step2_14), [step2_15] "=r" (step2_15)
351 : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
352 [cospi_30_64] "r" (cospi_30_64), [cospi_2_64] "r" (cospi_2_64),
353 [cospi_14_64] "r" (cospi_14_64), [cospi_18_64] "r" (cospi_18_64),
354 [cospi_8_64] "r" (cospi_8_64), [cospi_24_64] "r" (cospi_24_64)
355 );
356
357 __asm__ __volatile__ (
358 "lh %[load1], 20(%[input]) \n\t"
359 "lh %[load2], 44(%[input]) \n\t"
360 "lh %[load3], 52(%[input]) \n\t"
361 "lh %[load4], 12(%[input]) \n\t"
362
363 "mtlo %[const_2_power_13], $ac1 \n\t"
364 "mthi $zero, $ac1 \n\t"
365 "mtlo %[const_2_power_13], $ac3 \n\t"
366 "mthi $zero, $ac3 \n\t"
367
368 "madd $ac1, %[load1], %[cospi_22_64] \n\t"
369 "msub $ac1, %[load2], %[cospi_10_64] \n\t"
370 "extp %[temp0], $ac1, 31 \n\t"
371 "madd $ac3, %[load1], %[cospi_10_64] \n\t"
372 "madd $ac3, %[load2], %[cospi_22_64] \n\t"
373 "extp %[temp3], $ac3, 31 \n\t"
374
375 "mtlo %[const_2_power_13], $ac1 \n\t"
376 "mthi $zero, $ac1 \n\t"
377 "mtlo %[const_2_power_13], $ac2 \n\t"
378 "mthi $zero, $ac2 \n\t"
379
380 "madd $ac2, %[load3], %[cospi_6_64] \n\t"
381 "msub $ac2, %[load4], %[cospi_26_64] \n\t"
382 "extp %[temp1], $ac2, 31 \n\t"
383 "madd $ac1, %[load3], %[cospi_26_64] \n\t"
384 "madd $ac1, %[load4], %[cospi_6_64] \n\t"
385 "extp %[temp2], $ac1, 31 \n\t"
386
387 "mtlo %[const_2_power_13], $ac1 \n\t"
388 "mthi $zero, $ac1 \n\t"
389 "mtlo %[const_2_power_13], $ac3 \n\t"
390 "mthi $zero, $ac3 \n\t"
391
392 "sub %[load1], %[temp1], %[temp0] \n\t"
393 "sub %[load2], %[temp2], %[temp3] \n\t"
394 "msub $ac1, %[load1], %[cospi_24_64] \n\t"
395 "msub $ac1, %[load2], %[cospi_8_64] \n\t"
396 "madd $ac3, %[load2], %[cospi_24_64] \n\t"
397 "msub $ac3, %[load1], %[cospi_8_64] \n\t"
398 "extp %[step2_10], $ac1, 31 \n\t"
399 "extp %[step2_13], $ac3, 31 \n\t"
400 "add %[step2_11], %[temp0], %[temp1] \n\t"
401 "add %[step2_12], %[temp2], %[temp3] \n\t"
402
403 : [load1] "=&r" (load1), [load2] "=&r" (load2), [load3] "=&r" (load3),
404 [load4] "=&r" (load4), [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
405 [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
406 [step2_10] "=r" (step2_10), [step2_11] "=r" (step2_11),
407 [step2_12] "=r" (step2_12), [step2_13] "=r" (step2_13)
408 : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
409 [cospi_22_64] "r" (cospi_22_64), [cospi_10_64] "r" (cospi_10_64),
410 [cospi_6_64] "r" (cospi_6_64), [cospi_26_64] "r" (cospi_26_64),
411 [cospi_8_64] "r" (cospi_8_64), [cospi_24_64] "r" (cospi_24_64)
412 );
413
414 __asm__ __volatile__ (
415 "mtlo %[const_2_power_13], $ac0 \n\t"
416 "mthi $zero, $ac0 \n\t"
417 "sub %[temp0], %[step2_14], %[step2_13] \n\t"
418 "sub %[temp0], %[temp0], %[step2_9] \n\t"
419 "add %[temp0], %[temp0], %[step2_10] \n\t"
420 "madd $ac0, %[temp0], %[cospi_16_64] \n\t"
421 "mtlo %[const_2_power_13], $ac1 \n\t"
422 "mthi $zero, $ac1 \n\t"
423 "sub %[temp1], %[step2_14], %[step2_13] \n\t"
424 "add %[temp1], %[temp1], %[step2_9] \n\t"
425 "sub %[temp1], %[temp1], %[step2_10] \n\t"
426 "madd $ac1, %[temp1], %[cospi_16_64] \n\t"
427 "mtlo %[const_2_power_13], $ac2 \n\t"
428 "mthi $zero, $ac2 \n\t"
429 "sub %[temp0], %[step2_15], %[step2_12] \n\t"
430 "sub %[temp0], %[temp0], %[step2_8] \n\t"
431 "add %[temp0], %[temp0], %[step2_11] \n\t"
432 "madd $ac2, %[temp0], %[cospi_16_64] \n\t"
433 "mtlo %[const_2_power_13], $ac3 \n\t"
434 "mthi $zero, $ac3 \n\t"
435 "sub %[temp1], %[step2_15], %[step2_12] \n\t"
436 "add %[temp1], %[temp1], %[step2_8] \n\t"
437 "sub %[temp1], %[temp1], %[step2_11] \n\t"
438 "madd $ac3, %[temp1], %[cospi_16_64] \n\t"
439
440 "add %[step3_8], %[step2_8], %[step2_11] \n\t"
441 "add %[step3_9], %[step2_9], %[step2_10] \n\t"
442 "add %[step3_14], %[step2_13], %[step2_14] \n\t"
443 "add %[step3_15], %[step2_12], %[step2_15] \n\t"
444 "extp %[step3_10], $ac0, 31 \n\t"
445 "extp %[step3_13], $ac1, 31 \n\t"
446 "extp %[step3_11], $ac2, 31 \n\t"
447 "extp %[step3_12], $ac3, 31 \n\t"
448
449 : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
450 [step3_8] "=r" (step3_8), [step3_9] "=r" (step3_9),
451 [step3_10] "=r" (step3_10), [step3_11] "=r" (step3_11),
452 [step3_12] "=r" (step3_12), [step3_13] "=r" (step3_13),
453 [step3_14] "=r" (step3_14), [step3_15] "=r" (step3_15)
454 : [const_2_power_13] "r" (const_2_power_13), [step2_8] "r" (step2_8),
455 [step2_9] "r" (step2_9), [step2_10] "r" (step2_10),
456 [step2_11] "r" (step2_11), [step2_12] "r" (step2_12),
457 [step2_13] "r" (step2_13), [step2_14] "r" (step2_14),
458 [step2_15] "r" (step2_15), [cospi_16_64] "r" (cospi_16_64)
459 );
460
461 step2_18 = step1_17 - step1_18;
462 step2_29 = step1_30 - step1_29;
463
464 __asm__ __volatile__ (
465 "mtlo %[const_2_power_13], $ac0 \n\t"
466 "mthi $zero, $ac0 \n\t"
467 "msub $ac0, %[step2_18], %[cospi_8_64] \n\t"
468 "madd $ac0, %[step2_29], %[cospi_24_64] \n\t"
469 "extp %[step3_18], $ac0, 31 \n\t"
470
471 : [step3_18] "=r" (step3_18)
472 : [const_2_power_13] "r" (const_2_power_13),
473 [step2_18] "r" (step2_18), [step2_29] "r" (step2_29),
474 [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)
475 );
476
477 temp21 = step2_18 * cospi_24_64 + step2_29 * cospi_8_64;
478 step3_29 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
479
480 step2_19 = step1_16 - step1_19;
481 step2_28 = step1_31 - step1_28;
482
483 __asm__ __volatile__ (
484 "mtlo %[const_2_power_13], $ac0 \n\t"
485 "mthi $zero, $ac0 \n\t"
486 "msub $ac0, %[step2_19], %[cospi_8_64] \n\t"
487 "madd $ac0, %[step2_28], %[cospi_24_64] \n\t"
488 "extp %[step3_19], $ac0, 31 \n\t"
489
490 : [step3_19] "=r" (step3_19)
491 : [const_2_power_13] "r" (const_2_power_13),
492 [step2_19] "r" (step2_19), [step2_28] "r" (step2_28),
493 [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)
494 );
495
496 temp21 = step2_19 * cospi_24_64 + step2_28 * cospi_8_64;
497 step3_28 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
498
499 step3_16 = step1_16 + step1_19;
500 step3_17 = step1_17 + step1_18;
501 step3_30 = step1_29 + step1_30;
502 step3_31 = step1_28 + step1_31;
503
504 step2_20 = step1_23 - step1_20;
505 step2_27 = step1_24 - step1_27;
506
507 __asm__ __volatile__ (
508 "mtlo %[const_2_power_13], $ac0 \n\t"
509 "mthi $zero, $ac0 \n\t"
510 "msub $ac0, %[step2_20], %[cospi_24_64] \n\t"
511 "msub $ac0, %[step2_27], %[cospi_8_64] \n\t"
512 "extp %[step3_20], $ac0, 31 \n\t"
513
514 : [step3_20] "=r" (step3_20)
515 : [const_2_power_13] "r" (const_2_power_13),
516 [step2_20] "r" (step2_20), [step2_27] "r" (step2_27),
517 [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)
518 );
519
520 temp21 = -step2_20 * cospi_8_64 + step2_27 * cospi_24_64;
521 step3_27 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
522
523 step2_21 = step1_22 - step1_21;
524 step2_26 = step1_25 - step1_26;
525
526 __asm__ __volatile__ (
527 "mtlo %[const_2_power_13], $ac1 \n\t"
528 "mthi $zero, $ac1 \n\t"
529 "msub $ac1, %[step2_21], %[cospi_24_64] \n\t"
530 "msub $ac1, %[step2_26], %[cospi_8_64] \n\t"
531 "extp %[step3_21], $ac1, 31 \n\t"
532
533 : [step3_21] "=r" (step3_21)
534 : [const_2_power_13] "r" (const_2_power_13),
535 [step2_21] "r" (step2_21), [step2_26] "r" (step2_26),
536 [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)
537 );
538
539 temp21 = -step2_21 * cospi_8_64 + step2_26 * cospi_24_64;
540 step3_26 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
541
542 step3_22 = step1_21 + step1_22;
543 step3_23 = step1_20 + step1_23;
544 step3_24 = step1_24 + step1_27;
545 step3_25 = step1_25 + step1_26;
546
547 step2_16 = step3_16 + step3_23;
548 step2_17 = step3_17 + step3_22;
549 step2_18 = step3_18 + step3_21;
550 step2_19 = step3_19 + step3_20;
551 step2_20 = step3_19 - step3_20;
552 step2_21 = step3_18 - step3_21;
553 step2_22 = step3_17 - step3_22;
554 step2_23 = step3_16 - step3_23;
555
556 step2_24 = step3_31 - step3_24;
557 step2_25 = step3_30 - step3_25;
558 step2_26 = step3_29 - step3_26;
559 step2_27 = step3_28 - step3_27;
560 step2_28 = step3_28 + step3_27;
561 step2_29 = step3_29 + step3_26;
562 step2_30 = step3_30 + step3_25;
563 step2_31 = step3_31 + step3_24;
564
565 __asm__ __volatile__ (
566 "lh %[load1], 0(%[input]) \n\t"
567 "lh %[load2], 32(%[input]) \n\t"
568 "lh %[load3], 16(%[input]) \n\t"
569 "lh %[load4], 48(%[input]) \n\t"
570
571 "mtlo %[const_2_power_13], $ac1 \n\t"
572 "mthi $zero, $ac1 \n\t"
573 "mtlo %[const_2_power_13], $ac2 \n\t"
574 "mthi $zero, $ac2 \n\t"
575 "add %[result1], %[load1], %[load2] \n\t"
576 "sub %[result2], %[load1], %[load2] \n\t"
577 "madd $ac1, %[result1], %[cospi_16_64] \n\t"
578 "madd $ac2, %[result2], %[cospi_16_64] \n\t"
579 "extp %[temp0], $ac1, 31 \n\t"
580 "extp %[temp1], $ac2, 31 \n\t"
581
582 "mtlo %[const_2_power_13], $ac3 \n\t"
583 "mthi $zero, $ac3 \n\t"
584 "madd $ac3, %[load3], %[cospi_24_64] \n\t"
585 "msub $ac3, %[load4], %[cospi_8_64] \n\t"
586 "extp %[temp2], $ac3, 31 \n\t"
587 "mtlo %[const_2_power_13], $ac1 \n\t"
588 "mthi $zero, $ac1 \n\t"
589 "madd $ac1, %[load3], %[cospi_8_64] \n\t"
590 "madd $ac1, %[load4], %[cospi_24_64] \n\t"
591 "extp %[temp3], $ac1, 31 \n\t"
592 "add %[step1_0], %[temp0], %[temp3] \n\t"
593 "add %[step1_1], %[temp1], %[temp2] \n\t"
594 "sub %[step1_2], %[temp1], %[temp2] \n\t"
595 "sub %[step1_3], %[temp0], %[temp3] \n\t"
596
597 : [load1] "=&r" (load1), [load2] "=&r" (load2),
598 [load3] "=&r" (load3), [load4] "=&r" (load4),
599 [result1] "=&r" (result1), [result2] "=&r" (result2),
600 [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
601 [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
602 [step1_0] "=r" (step1_0), [step1_1] "=r" (step1_1),
603 [step1_2] "=r" (step1_2), [step1_3] "=r" (step1_3)
604 : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
605 [cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64),
606 [cospi_16_64] "r" (cospi_16_64)
607 );
608
609 __asm__ __volatile__ (
610 "lh %[load1], 8(%[input]) \n\t"
611 "lh %[load2], 56(%[input]) \n\t"
612 "lh %[load3], 40(%[input]) \n\t"
613 "lh %[load4], 24(%[input]) \n\t"
614
615 "mtlo %[const_2_power_13], $ac1 \n\t"
616 "mthi $zero, $ac1 \n\t"
617 "mtlo %[const_2_power_13], $ac3 \n\t"
618 "mthi $zero, $ac3 \n\t"
619
620 "madd $ac1, %[load1], %[cospi_28_64] \n\t"
621 "msub $ac1, %[load2], %[cospi_4_64] \n\t"
622 "extp %[temp0], $ac1, 31 \n\t"
623 "madd $ac3, %[load1], %[cospi_4_64] \n\t"
624 "madd $ac3, %[load2], %[cospi_28_64] \n\t"
625 "extp %[temp3], $ac3, 31 \n\t"
626
627 "mtlo %[const_2_power_13], $ac1 \n\t"
628 "mthi $zero, $ac1 \n\t"
629 "mtlo %[const_2_power_13], $ac2 \n\t"
630 "mthi $zero, $ac2 \n\t"
631
632 "madd $ac2, %[load3], %[cospi_12_64] \n\t"
633 "msub $ac2, %[load4], %[cospi_20_64] \n\t"
634 "extp %[temp1], $ac2, 31 \n\t"
635 "madd $ac1, %[load3], %[cospi_20_64] \n\t"
636 "madd $ac1, %[load4], %[cospi_12_64] \n\t"
637 "extp %[temp2], $ac1, 31 \n\t"
638
639 "mtlo %[const_2_power_13], $ac1 \n\t"
640 "mthi $zero, $ac1 \n\t"
641 "mtlo %[const_2_power_13], $ac3 \n\t"
642 "mthi $zero, $ac3 \n\t"
643
644 "sub %[load1], %[temp3], %[temp2] \n\t"
645 "sub %[load1], %[load1], %[temp0] \n\t"
646 "add %[load1], %[load1], %[temp1] \n\t"
647 "sub %[load2], %[temp0], %[temp1] \n\t"
648 "sub %[load2], %[load2], %[temp2] \n\t"
649 "add %[load2], %[load2], %[temp3] \n\t"
650 "madd $ac1, %[load1], %[cospi_16_64] \n\t"
651 "madd $ac3, %[load2], %[cospi_16_64] \n\t"
652
653 "extp %[step1_5], $ac1, 31 \n\t"
654 "extp %[step1_6], $ac3, 31 \n\t"
655 "add %[step1_4], %[temp0], %[temp1] \n\t"
656 "add %[step1_7], %[temp3], %[temp2] \n\t"
657
658 : [load1] "=&r" (load1), [load2] "=&r" (load2),
659 [load3] "=&r" (load3), [load4] "=&r" (load4),
660 [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
661 [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
662 [step1_4] "=r" (step1_4), [step1_5] "=r" (step1_5),
663 [step1_6] "=r" (step1_6), [step1_7] "=r" (step1_7)
664 : [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),
665 [cospi_20_64] "r" (cospi_20_64), [cospi_12_64] "r" (cospi_12_64),
666 [cospi_4_64] "r" (cospi_4_64), [cospi_28_64] "r" (cospi_28_64),
667 [cospi_16_64] "r" (cospi_16_64)
668 );
669
670 step2_0 = step1_0 + step1_7;
671 step2_1 = step1_1 + step1_6;
672 step2_2 = step1_2 + step1_5;
673 step2_3 = step1_3 + step1_4;
674 step2_4 = step1_3 - step1_4;
675 step2_5 = step1_2 - step1_5;
676 step2_6 = step1_1 - step1_6;
677 step2_7 = step1_0 - step1_7;
678
679 // stage 7
680 step1_0 = step2_0 + step3_15;
681 step1_1 = step2_1 + step3_14;
682 step1_2 = step2_2 + step3_13;
683 step1_3 = step2_3 + step3_12;
684 step1_4 = step2_4 + step3_11;
685 step1_5 = step2_5 + step3_10;
686 step1_6 = step2_6 + step3_9;
687 step1_7 = step2_7 + step3_8;
688 step1_8 = step2_7 - step3_8;
689 step1_9 = step2_6 - step3_9;
690 step1_10 = step2_5 - step3_10;
691 step1_11 = step2_4 - step3_11;
692 step1_12 = step2_3 - step3_12;
693 step1_13 = step2_2 - step3_13;
694 step1_14 = step2_1 - step3_14;
695 step1_15 = step2_0 - step3_15;
696
697 __asm__ __volatile__ (
698 "sub %[temp0], %[step2_27], %[step2_20] \n\t"
699 "mtlo %[const_2_power_13], $ac0 \n\t"
700 "mthi $zero, $ac0 \n\t"
701 "madd $ac0, %[temp0], %[cospi_16_64] \n\t"
702 "extp %[step1_20], $ac0, 31 \n\t"
703
704 : [temp0] "=&r" (temp0), [step1_20] "=r" (step1_20)
705 : [const_2_power_13] "r" (const_2_power_13), [step2_20] "r" (step2_20),
706 [step2_27] "r" (step2_27), [cospi_16_64] "r" (cospi_16_64)
707 );
708
709 temp21 = (step2_20 + step2_27) * cospi_16_64;
710 step1_27 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
711
712 __asm__ __volatile__ (
713 "sub %[temp0], %[step2_26], %[step2_21] \n\t"
714 "mtlo %[const_2_power_13], $ac0 \n\t"
715 "mthi $zero, $ac0 \n\t"
716 "madd $ac0, %[temp0], %[cospi_16_64] \n\t"
717 "extp %[step1_21], $ac0, 31 \n\t"
718
719 : [temp0] "=&r" (temp0), [step1_21] "=r" (step1_21)
720 : [const_2_power_13] "r" (const_2_power_13), [step2_26] "r" (step2_26),
721 [step2_21] "r" (step2_21), [cospi_16_64] "r" (cospi_16_64)
722 );
723
724 temp21 = (step2_21 + step2_26) * cospi_16_64;
725 step1_26 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
726
727 __asm__ __volatile__ (
728 "sub %[temp0], %[step2_25], %[step2_22] \n\t"
729 "mtlo %[const_2_power_13], $ac0 \n\t"
730 "mthi $zero, $ac0 \n\t"
731 "madd $ac0, %[temp0], %[cospi_16_64] \n\t"
732 "extp %[step1_22], $ac0, 31 \n\t"
733
734 : [temp0] "=&r" (temp0), [step1_22] "=r" (step1_22)
735 : [const_2_power_13] "r" (const_2_power_13), [step2_25] "r" (step2_25),
736 [step2_22] "r" (step2_22), [cospi_16_64] "r" (cospi_16_64)
737 );
738
739 temp21 = (step2_22 + step2_25) * cospi_16_64;
740 step1_25 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
741
742 __asm__ __volatile__ (
743 "sub %[temp0], %[step2_24], %[step2_23] \n\t"
744 "mtlo %[const_2_power_13], $ac0 \n\t"
745 "mthi $zero, $ac0 \n\t"
746 "madd $ac0, %[temp0], %[cospi_16_64] \n\t"
747 "extp %[step1_23], $ac0, 31 \n\t"
748
749 : [temp0] "=&r" (temp0), [step1_23] "=r" (step1_23)
750 : [const_2_power_13] "r" (const_2_power_13), [step2_24] "r" (step2_24),
751 [step2_23] "r" (step2_23), [cospi_16_64] "r" (cospi_16_64)
752 );
753
754 temp21 = (step2_23 + step2_24) * cospi_16_64;
755 step1_24 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
756
757 __asm__ __volatile__ (
758 "lbu %[temp2], 0(%[dest_pix]) \n\t"
759 "add %[temp0], %[step1_0], %[step2_31] \n\t"
760 "addi %[temp0], %[temp0], 32 \n\t"
761 "sra %[temp0], %[temp0], 6 \n\t"
762 "add %[temp2], %[temp2], %[temp0] \n\t"
763 "lbux %[temp0], %[temp2](%[cm]) \n\t"
764 "add %[temp1], %[step1_1], %[step2_30] \n\t"
765 "sb %[temp0], 0(%[dest_pix]) \n\t"
766 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
767 "lbu %[temp3], 0(%[dest_pix]) \n\t"
768 "addi %[temp1], %[temp1], 32 \n\t"
769 "sra %[temp1], %[temp1], 6 \n\t"
770 "add %[temp3], %[temp3], %[temp1] \n\t"
771 "lbux %[temp1], %[temp3](%[cm]) \n\t"
772 "sb %[temp1], 0(%[dest_pix]) \n\t"
773 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
774
775 "lbu %[temp2], 0(%[dest_pix]) \n\t"
776 "add %[temp0], %[step1_2], %[step2_29] \n\t"
777 "addi %[temp0], %[temp0], 32 \n\t"
778 "sra %[temp0], %[temp0], 6 \n\t"
779 "add %[temp2], %[temp2], %[temp0] \n\t"
780 "lbux %[temp0], %[temp2](%[cm]) \n\t"
781 "add %[temp1], %[step1_3], %[step2_28] \n\t"
782 "sb %[temp0], 0(%[dest_pix]) \n\t"
783 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
784 "lbu %[temp3], 0(%[dest_pix]) \n\t"
785 "addi %[temp1], %[temp1], 32 \n\t"
786 "sra %[temp1], %[temp1], 6 \n\t"
787 "add %[temp3], %[temp3], %[temp1] \n\t"
788 "lbux %[temp1], %[temp3](%[cm]) \n\t"
789 "sb %[temp1], 0(%[dest_pix]) \n\t"
790 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
791
792 : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
793 [temp3] "=&r" (temp3), [dest_pix] "+r" (dest_pix)
794 : [cm] "r" (cm), [dest_stride] "r" (dest_stride),
795 [step1_0] "r" (step1_0), [step1_1] "r" (step1_1),
796 [step1_2] "r" (step1_2), [step1_3] "r" (step1_3),
797 [step2_28] "r" (step2_28), [step2_29] "r" (step2_29),
798 [step2_30] "r" (step2_30), [step2_31] "r" (step2_31)
799 );
800
801 step3_12 = ROUND_POWER_OF_TWO((step1_3 - step2_28), 6);
802 step3_13 = ROUND_POWER_OF_TWO((step1_2 - step2_29), 6);
803 step3_14 = ROUND_POWER_OF_TWO((step1_1 - step2_30), 6);
804 step3_15 = ROUND_POWER_OF_TWO((step1_0 - step2_31), 6);
805
806 __asm__ __volatile__ (
807 "lbu %[temp2], 0(%[dest_pix1]) \n\t"
808 "add %[temp2], %[temp2], %[step3_15] \n\t"
809 "lbux %[temp0], %[temp2](%[cm]) \n\t"
810 "sb %[temp0], 0(%[dest_pix1]) \n\t"
811 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
812 "lbu %[temp3], 0(%[dest_pix1]) \n\t"
813 "add %[temp3], %[temp3], %[step3_14] \n\t"
814 "lbux %[temp1], %[temp3](%[cm]) \n\t"
815 "sb %[temp1], 0(%[dest_pix1]) \n\t"
816 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
817
818 "lbu %[temp2], 0(%[dest_pix1]) \n\t"
819 "add %[temp2], %[temp2], %[step3_13] \n\t"
820 "lbux %[temp0], %[temp2](%[cm]) \n\t"
821 "sb %[temp0], 0(%[dest_pix1]) \n\t"
822 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
823 "lbu %[temp3], 0(%[dest_pix1]) \n\t"
824 "add %[temp3], %[temp3], %[step3_12] \n\t"
825 "lbux %[temp1], %[temp3](%[cm]) \n\t"
826 "sb %[temp1], 0(%[dest_pix1]) \n\t"
827 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
828
829 : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
830 [temp3] "=&r" (temp3), [dest_pix1] "+r" (dest_pix1)
831 : [cm] "r" (cm), [dest_stride] "r" (dest_stride),
832 [step3_12] "r" (step3_12), [step3_13] "r" (step3_13),
833 [step3_14] "r" (step3_14), [step3_15] "r" (step3_15)
834 );
835
836 __asm__ __volatile__ (
837 "lbu %[temp2], 0(%[dest_pix]) \n\t"
838 "add %[temp0], %[step1_4], %[step1_27] \n\t"
839 "addi %[temp0], %[temp0], 32 \n\t"
840 "sra %[temp0], %[temp0], 6 \n\t"
841 "add %[temp2], %[temp2], %[temp0] \n\t"
842 "lbux %[temp0], %[temp2](%[cm]) \n\t"
843 "add %[temp1], %[step1_5], %[step1_26] \n\t"
844 "sb %[temp0], 0(%[dest_pix]) \n\t"
845 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
846 "lbu %[temp3], 0(%[dest_pix]) \n\t"
847 "addi %[temp1], %[temp1], 32 \n\t"
848 "sra %[temp1], %[temp1], 6 \n\t"
849 "add %[temp3], %[temp3], %[temp1] \n\t"
850 "lbux %[temp1], %[temp3](%[cm]) \n\t"
851 "sb %[temp1], 0(%[dest_pix]) \n\t"
852 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
853
854 "lbu %[temp2], 0(%[dest_pix]) \n\t"
855 "add %[temp0], %[step1_6], %[step1_25] \n\t"
856 "addi %[temp0], %[temp0], 32 \n\t"
857 "sra %[temp0], %[temp0], 6 \n\t"
858 "add %[temp2], %[temp2], %[temp0] \n\t"
859 "lbux %[temp0], %[temp2](%[cm]) \n\t"
860 "add %[temp1], %[step1_7], %[step1_24] \n\t"
861 "sb %[temp0], 0(%[dest_pix]) \n\t"
862 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
863 "lbu %[temp3], 0(%[dest_pix]) \n\t"
864 "addi %[temp1], %[temp1], 32 \n\t"
865 "sra %[temp1], %[temp1], 6 \n\t"
866 "add %[temp3], %[temp3], %[temp1] \n\t"
867 "lbux %[temp1], %[temp3](%[cm]) \n\t"
868 "sb %[temp1], 0(%[dest_pix]) \n\t"
869 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
870
871 : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
872 [temp3] "=&r" (temp3), [dest_pix] "+r" (dest_pix)
873 : [cm] "r" (cm), [dest_stride] "r" (dest_stride),
874 [step1_4] "r" (step1_4), [step1_5] "r" (step1_5),
875 [step1_6] "r" (step1_6), [step1_7] "r" (step1_7),
876 [step1_24] "r" (step1_24), [step1_25] "r" (step1_25),
877 [step1_26] "r" (step1_26), [step1_27] "r" (step1_27)
878 );
879
880 step3_12 = ROUND_POWER_OF_TWO((step1_7 - step1_24), 6);
881 step3_13 = ROUND_POWER_OF_TWO((step1_6 - step1_25), 6);
882 step3_14 = ROUND_POWER_OF_TWO((step1_5 - step1_26), 6);
883 step3_15 = ROUND_POWER_OF_TWO((step1_4 - step1_27), 6);
884
885 __asm__ __volatile__ (
886 "lbu %[temp2], 0(%[dest_pix1]) \n\t"
887 "add %[temp2], %[temp2], %[step3_15] \n\t"
888 "lbux %[temp0], %[temp2](%[cm]) \n\t"
889 "sb %[temp0], 0(%[dest_pix1]) \n\t"
890 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
891 "lbu %[temp3], 0(%[dest_pix1]) \n\t"
892 "add %[temp3], %[temp3], %[step3_14] \n\t"
893 "lbux %[temp1], %[temp3](%[cm]) \n\t"
894 "sb %[temp1], 0(%[dest_pix1]) \n\t"
895 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
896
897 "lbu %[temp2], 0(%[dest_pix1]) \n\t"
898 "add %[temp2], %[temp2], %[step3_13] \n\t"
899 "lbux %[temp0], %[temp2](%[cm]) \n\t"
900 "sb %[temp0], 0(%[dest_pix1]) \n\t"
901 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
902 "lbu %[temp3], 0(%[dest_pix1]) \n\t"
903 "add %[temp3], %[temp3], %[step3_12] \n\t"
904 "lbux %[temp1], %[temp3](%[cm]) \n\t"
905 "sb %[temp1], 0(%[dest_pix1]) \n\t"
906 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
907
908 : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
909 [temp3] "=&r" (temp3), [dest_pix1] "+r" (dest_pix1)
910 : [cm] "r" (cm), [dest_stride] "r" (dest_stride),
911 [step3_12] "r" (step3_12), [step3_13] "r" (step3_13),
912 [step3_14] "r" (step3_14), [step3_15] "r" (step3_15)
913 );
914
915 __asm__ __volatile__ (
916 "lbu %[temp2], 0(%[dest_pix]) \n\t"
917 "add %[temp0], %[step1_8], %[step1_23] \n\t"
918 "addi %[temp0], %[temp0], 32 \n\t"
919 "sra %[temp0], %[temp0], 6 \n\t"
920 "add %[temp2], %[temp2], %[temp0] \n\t"
921 "lbux %[temp0], %[temp2](%[cm]) \n\t"
922 "add %[temp1], %[step1_9], %[step1_22] \n\t"
923 "sb %[temp0], 0(%[dest_pix]) \n\t"
924 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
925 "lbu %[temp3], 0(%[dest_pix]) \n\t"
926 "addi %[temp1], %[temp1], 32 \n\t"
927 "sra %[temp1], %[temp1], 6 \n\t"
928 "add %[temp3], %[temp3], %[temp1] \n\t"
929 "lbux %[temp1], %[temp3](%[cm]) \n\t"
930 "sb %[temp1], 0(%[dest_pix]) \n\t"
931 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
932
933 "lbu %[temp2], 0(%[dest_pix]) \n\t"
934 "add %[temp0], %[step1_10], %[step1_21] \n\t"
935 "addi %[temp0], %[temp0], 32 \n\t"
936 "sra %[temp0], %[temp0], 6 \n\t"
937 "add %[temp2], %[temp2], %[temp0] \n\t"
938 "lbux %[temp0], %[temp2](%[cm]) \n\t"
939 "add %[temp1], %[step1_11], %[step1_20] \n\t"
940 "sb %[temp0], 0(%[dest_pix]) \n\t"
941 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
942 "lbu %[temp3], 0(%[dest_pix]) \n\t"
943 "addi %[temp1], %[temp1], 32 \n\t"
944 "sra %[temp1], %[temp1], 6 \n\t"
945 "add %[temp3], %[temp3], %[temp1] \n\t"
946 "lbux %[temp1], %[temp3](%[cm]) \n\t"
947 "sb %[temp1], 0(%[dest_pix]) \n\t"
948 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
949
950 : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
951 [temp3] "=&r" (temp3), [dest_pix] "+r" (dest_pix)
952 : [cm] "r" (cm), [dest_stride] "r" (dest_stride),
953 [step1_8] "r" (step1_8), [step1_9] "r" (step1_9),
954 [step1_10] "r" (step1_10), [step1_11] "r" (step1_11),
955 [step1_20] "r" (step1_20), [step1_21] "r" (step1_21),
956 [step1_22] "r" (step1_22), [step1_23] "r" (step1_23)
957 );
958
959 step3_12 = ROUND_POWER_OF_TWO((step1_11 - step1_20), 6);
960 step3_13 = ROUND_POWER_OF_TWO((step1_10 - step1_21), 6);
961 step3_14 = ROUND_POWER_OF_TWO((step1_9 - step1_22), 6);
962 step3_15 = ROUND_POWER_OF_TWO((step1_8 - step1_23), 6);
963
964 __asm__ __volatile__ (
965 "lbu %[temp2], 0(%[dest_pix1]) \n\t"
966 "add %[temp2], %[temp2], %[step3_15] \n\t"
967 "lbux %[temp0], %[temp2](%[cm]) \n\t"
968 "sb %[temp0], 0(%[dest_pix1]) \n\t"
969 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
970 "lbu %[temp3], 0(%[dest_pix1]) \n\t"
971 "add %[temp3], %[temp3], %[step3_14] \n\t"
972 "lbux %[temp1], %[temp3](%[cm]) \n\t"
973 "sb %[temp1], 0(%[dest_pix1]) \n\t"
974 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
975
976 "lbu %[temp2], 0(%[dest_pix1]) \n\t"
977 "add %[temp2], %[temp2], %[step3_13] \n\t"
978 "lbux %[temp0], %[temp2](%[cm]) \n\t"
979 "sb %[temp0], 0(%[dest_pix1]) \n\t"
980 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
981 "lbu %[temp3], 0(%[dest_pix1]) \n\t"
982 "add %[temp3], %[temp3], %[step3_12] \n\t"
983 "lbux %[temp1], %[temp3](%[cm]) \n\t"
984 "sb %[temp1], 0(%[dest_pix1]) \n\t"
985 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
986
987 : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
988 [temp3] "=&r" (temp3), [dest_pix1] "+r" (dest_pix1)
989 : [cm] "r" (cm), [dest_stride] "r" (dest_stride),
990 [step3_12] "r" (step3_12), [step3_13] "r" (step3_13),
991 [step3_14] "r" (step3_14), [step3_15] "r" (step3_15)
992 );
993
994 __asm__ __volatile__ (
995 "lbu %[temp2], 0(%[dest_pix]) \n\t"
996 "add %[temp0], %[step1_12], %[step2_19] \n\t"
997 "addi %[temp0], %[temp0], 32 \n\t"
998 "sra %[temp0], %[temp0], 6 \n\t"
999 "add %[temp2], %[temp2], %[temp0] \n\t"
1000 "lbux %[temp0], %[temp2](%[cm]) \n\t"
1001 "add %[temp1], %[step1_13], %[step2_18] \n\t"
1002 "sb %[temp0], 0(%[dest_pix]) \n\t"
1003 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
1004 "lbu %[temp3], 0(%[dest_pix]) \n\t"
1005 "addi %[temp1], %[temp1], 32 \n\t"
1006 "sra %[temp1], %[temp1], 6 \n\t"
1007 "add %[temp3], %[temp3], %[temp1] \n\t"
1008 "lbux %[temp1], %[temp3](%[cm]) \n\t"
1009 "sb %[temp1], 0(%[dest_pix]) \n\t"
1010 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
1011
1012 "lbu %[temp2], 0(%[dest_pix]) \n\t"
1013 "add %[temp0], %[step1_14], %[step2_17] \n\t"
1014 "addi %[temp0], %[temp0], 32 \n\t"
1015 "sra %[temp0], %[temp0], 6 \n\t"
1016 "add %[temp2], %[temp2], %[temp0] \n\t"
1017 "lbux %[temp0], %[temp2](%[cm]) \n\t"
1018 "add %[temp1], %[step1_15], %[step2_16] \n\t"
1019 "sb %[temp0], 0(%[dest_pix]) \n\t"
1020 "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t"
1021 "lbu %[temp3], 0(%[dest_pix]) \n\t"
1022 "addi %[temp1], %[temp1], 32 \n\t"
1023 "sra %[temp1], %[temp1], 6 \n\t"
1024 "add %[temp3], %[temp3], %[temp1] \n\t"
1025 "lbux %[temp1], %[temp3](%[cm]) \n\t"
1026 "sb %[temp1], 0(%[dest_pix]) \n\t"
1027
1028 : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
1029 [temp3] "=&r" (temp3), [dest_pix] "+r" (dest_pix)
1030 : [cm] "r" (cm), [dest_stride] "r" (dest_stride),
1031 [step1_12] "r" (step1_12), [step1_13] "r" (step1_13),
1032 [step1_14] "r" (step1_14), [step1_15] "r" (step1_15),
1033 [step2_16] "r" (step2_16), [step2_17] "r" (step2_17),
1034 [step2_18] "r" (step2_18), [step2_19] "r" (step2_19)
1035 );
1036
1037 step3_12 = ROUND_POWER_OF_TWO((step1_15 - step2_16), 6);
1038 step3_13 = ROUND_POWER_OF_TWO((step1_14 - step2_17), 6);
1039 step3_14 = ROUND_POWER_OF_TWO((step1_13 - step2_18), 6);
1040 step3_15 = ROUND_POWER_OF_TWO((step1_12 - step2_19), 6);
1041
1042 __asm__ __volatile__ (
1043 "lbu %[temp2], 0(%[dest_pix1]) \n\t"
1044 "add %[temp2], %[temp2], %[step3_15] \n\t"
1045 "lbux %[temp0], %[temp2](%[cm]) \n\t"
1046 "sb %[temp0], 0(%[dest_pix1]) \n\t"
1047 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
1048 "lbu %[temp3], 0(%[dest_pix1]) \n\t"
1049 "add %[temp3], %[temp3], %[step3_14] \n\t"
1050 "lbux %[temp1], %[temp3](%[cm]) \n\t"
1051 "sb %[temp1], 0(%[dest_pix1]) \n\t"
1052 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
1053
1054 "lbu %[temp2], 0(%[dest_pix1]) \n\t"
1055 "add %[temp2], %[temp2], %[step3_13] \n\t"
1056 "lbux %[temp0], %[temp2](%[cm]) \n\t"
1057 "sb %[temp0], 0(%[dest_pix1]) \n\t"
1058 "subu %[dest_pix1], %[dest_pix1], %[dest_stride] \n\t"
1059 "lbu %[temp3], 0(%[dest_pix1]) \n\t"
1060 "add %[temp3], %[temp3], %[step3_12] \n\t"
1061 "lbux %[temp1], %[temp3](%[cm]) \n\t"
1062 "sb %[temp1], 0(%[dest_pix1]) \n\t"
1063
1064 : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
1065 [temp3] "=&r" (temp3), [dest_pix1] "+r" (dest_pix1)
1066 : [cm] "r" (cm), [dest_stride] "r" (dest_stride),
1067 [step3_12] "r" (step3_12), [step3_13] "r" (step3_13),
1068 [step3_14] "r" (step3_14), [step3_15] "r" (step3_15)
1069 );
1070
1071 input += 32;
1072 }
1073 }
1074 #endif // #if HAVE_DSPR2
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c ('k') | source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698