OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
(...skipping 25 matching lines...) Expand all Loading... |
36 cospi_24_64 EQU 6270 | 36 cospi_24_64 EQU 6270 |
37 cospi_25_64 EQU 5520 | 37 cospi_25_64 EQU 5520 |
38 cospi_26_64 EQU 4756 | 38 cospi_26_64 EQU 4756 |
39 cospi_27_64 EQU 3981 | 39 cospi_27_64 EQU 3981 |
40 cospi_28_64 EQU 3196 | 40 cospi_28_64 EQU 3196 |
41 cospi_29_64 EQU 2404 | 41 cospi_29_64 EQU 2404 |
42 cospi_30_64 EQU 1606 | 42 cospi_30_64 EQU 1606 |
43 cospi_31_64 EQU 804 | 43 cospi_31_64 EQU 804 |
44 | 44 |
45 | 45 |
46 EXPORT |vp9_idct32x32_1024_add_neon| | 46 EXPORT |vpx_idct32x32_1024_add_neon| |
47 ARM | 47 ARM |
48 REQUIRE8 | 48 REQUIRE8 |
49 PRESERVE8 | 49 PRESERVE8 |
50 | 50 |
51 AREA ||.text||, CODE, READONLY, ALIGN=2 | 51 AREA ||.text||, CODE, READONLY, ALIGN=2 |
52 | 52 |
53 AREA Block, CODE, READONLY | 53 AREA Block, CODE, READONLY |
54 | 54 |
55 ; -------------------------------------------------------------------------- | 55 ; -------------------------------------------------------------------------- |
56 ; Load from transposed_buffer | 56 ; Load from transposed_buffer |
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
281 MEND | 281 MEND |
282 ; -------------------------------------------------------------------------- | 282 ; -------------------------------------------------------------------------- |
283 ; Touches q8-q12, q15 (q13-q14 are preserved) | 283 ; Touches q8-q12, q15 (q13-q14 are preserved) |
284 ; valid output registers are anything but q8-q11 | 284 ; valid output registers are anything but q8-q11 |
285 MACRO | 285 MACRO |
286 DO_BUTTERFLY_STD $first_constant, $second_constant, $reg1, $reg2, $reg3, $re
g4 | 286 DO_BUTTERFLY_STD $first_constant, $second_constant, $reg1, $reg2, $reg3, $re
g4 |
287 DO_BUTTERFLY d28, d29, d26, d27, $first_constant, $second_constant, $reg1, $
reg2, $reg3, $reg4 | 287 DO_BUTTERFLY d28, d29, d26, d27, $first_constant, $second_constant, $reg1, $
reg2, $reg3, $reg4 |
288 MEND | 288 MEND |
289 ; -------------------------------------------------------------------------- | 289 ; -------------------------------------------------------------------------- |
290 | 290 |
291 ;void vp9_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride
); | 291 ;void vpx_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride
); |
292 ; | 292 ; |
293 ; r0 int16_t *input, | 293 ; r0 int16_t *input, |
294 ; r1 uint8_t *dest, | 294 ; r1 uint8_t *dest, |
295 ; r2 int dest_stride) | 295 ; r2 int dest_stride) |
296 ; loop counters | 296 ; loop counters |
297 ; r4 bands loop counter | 297 ; r4 bands loop counter |
298 ; r5 pass loop counter | 298 ; r5 pass loop counter |
299 ; r8 transpose loop counter | 299 ; r8 transpose loop counter |
300 ; combine-add pointers | 300 ; combine-add pointers |
301 ; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...) | 301 ; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...) |
302 ; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...) | 302 ; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...) |
303 ; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...) | 303 ; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...) |
304 ; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...) | 304 ; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...) |
305 | 305 |
306 |vp9_idct32x32_1024_add_neon| PROC | 306 |vpx_idct32x32_1024_add_neon| PROC |
307 ; This function does one pass of idct32x32 transform. | 307 ; This function does one pass of idct32x32 transform. |
308 ; | 308 ; |
309 ; This is done by transposing the input and then doing a 1d transform on | 309 ; This is done by transposing the input and then doing a 1d transform on |
310 ; columns. In the first pass, the transposed columns are the original | 310 ; columns. In the first pass, the transposed columns are the original |
311 ; rows. In the second pass, after the transposition, the colums are the | 311 ; rows. In the second pass, after the transposition, the colums are the |
312 ; original columns. | 312 ; original columns. |
313 ; The 1d transform is done by looping over bands of eight columns (the | 313 ; The 1d transform is done by looping over bands of eight columns (the |
314 ; idct32_bands loop). For each band, the transform input transposition | 314 ; idct32_bands loop). For each band, the transform input transposition |
315 ; is done on demand, one band of four 8x8 matrices at a time. The four | 315 ; is done on demand, one band of four 8x8 matrices at a time. The four |
316 ; matrices are transposed by pairs (the idct32_transpose_pair loop). | 316 ; matrices are transposed by pairs (the idct32_transpose_pair loop). |
(...skipping 971 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1288 | 1288 |
1289 ; bands loop processing | 1289 ; bands loop processing |
1290 subs r4, r4, #1 | 1290 subs r4, r4, #1 |
1291 bne idct32_bands_loop | 1291 bne idct32_bands_loop |
1292 | 1292 |
1293 ; stack operation | 1293 ; stack operation |
1294 add sp, sp, #512+2048+2048 | 1294 add sp, sp, #512+2048+2048 |
1295 vpop {d8-d15} | 1295 vpop {d8-d15} |
1296 pop {r4-r11} | 1296 pop {r4-r11} |
1297 bx lr | 1297 bx lr |
1298 ENDP ; |vp9_idct32x32_1024_add_neon| | 1298 ENDP ; |vpx_idct32x32_1024_add_neon| |
1299 END | 1299 END |
OLD | NEW |