| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 36 cospi_24_64 EQU 6270 | 36 cospi_24_64 EQU 6270 |
| 37 cospi_25_64 EQU 5520 | 37 cospi_25_64 EQU 5520 |
| 38 cospi_26_64 EQU 4756 | 38 cospi_26_64 EQU 4756 |
| 39 cospi_27_64 EQU 3981 | 39 cospi_27_64 EQU 3981 |
| 40 cospi_28_64 EQU 3196 | 40 cospi_28_64 EQU 3196 |
| 41 cospi_29_64 EQU 2404 | 41 cospi_29_64 EQU 2404 |
| 42 cospi_30_64 EQU 1606 | 42 cospi_30_64 EQU 1606 |
| 43 cospi_31_64 EQU 804 | 43 cospi_31_64 EQU 804 |
| 44 | 44 |
| 45 | 45 |
| 46 EXPORT |vp9_idct32x32_1024_add_neon| | 46 EXPORT |vpx_idct32x32_1024_add_neon| |
| 47 ARM | 47 ARM |
| 48 REQUIRE8 | 48 REQUIRE8 |
| 49 PRESERVE8 | 49 PRESERVE8 |
| 50 | 50 |
| 51 AREA ||.text||, CODE, READONLY, ALIGN=2 | 51 AREA ||.text||, CODE, READONLY, ALIGN=2 |
| 52 | 52 |
| 53 AREA Block, CODE, READONLY | 53 AREA Block, CODE, READONLY |
| 54 | 54 |
| 55 ; -------------------------------------------------------------------------- | 55 ; -------------------------------------------------------------------------- |
| 56 ; Load from transposed_buffer | 56 ; Load from transposed_buffer |
| (...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 281 MEND | 281 MEND |
| 282 ; -------------------------------------------------------------------------- | 282 ; -------------------------------------------------------------------------- |
| 283 ; Touches q8-q12, q15 (q13-q14 are preserved) | 283 ; Touches q8-q12, q15 (q13-q14 are preserved) |
| 284 ; valid output registers are anything but q8-q11 | 284 ; valid output registers are anything but q8-q11 |
| 285 MACRO | 285 MACRO |
| 286 DO_BUTTERFLY_STD $first_constant, $second_constant, $reg1, $reg2, $reg3, $re
g4 | 286 DO_BUTTERFLY_STD $first_constant, $second_constant, $reg1, $reg2, $reg3, $re
g4 |
| 287 DO_BUTTERFLY d28, d29, d26, d27, $first_constant, $second_constant, $reg1, $
reg2, $reg3, $reg4 | 287 DO_BUTTERFLY d28, d29, d26, d27, $first_constant, $second_constant, $reg1, $
reg2, $reg3, $reg4 |
| 288 MEND | 288 MEND |
| 289 ; -------------------------------------------------------------------------- | 289 ; -------------------------------------------------------------------------- |
| 290 | 290 |
| 291 ;void vp9_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride
); | 291 ;void vpx_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride
); |
| 292 ; | 292 ; |
| 293 ; r0 int16_t *input, | 293 ; r0 int16_t *input, |
| 294 ; r1 uint8_t *dest, | 294 ; r1 uint8_t *dest, |
| 295 ; r2 int dest_stride) | 295 ; r2 int dest_stride) |
| 296 ; loop counters | 296 ; loop counters |
| 297 ; r4 bands loop counter | 297 ; r4 bands loop counter |
| 298 ; r5 pass loop counter | 298 ; r5 pass loop counter |
| 299 ; r8 transpose loop counter | 299 ; r8 transpose loop counter |
| 300 ; combine-add pointers | 300 ; combine-add pointers |
| 301 ; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...) | 301 ; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...) |
| 302 ; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...) | 302 ; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...) |
| 303 ; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...) | 303 ; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...) |
| 304 ; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...) | 304 ; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...) |
| 305 | 305 |
| 306 |vp9_idct32x32_1024_add_neon| PROC | 306 |vpx_idct32x32_1024_add_neon| PROC |
| 307 ; This function does one pass of idct32x32 transform. | 307 ; This function does one pass of idct32x32 transform. |
| 308 ; | 308 ; |
| 309 ; This is done by transposing the input and then doing a 1d transform on | 309 ; This is done by transposing the input and then doing a 1d transform on |
| 310 ; columns. In the first pass, the transposed columns are the original | 310 ; columns. In the first pass, the transposed columns are the original |
| 311 ; rows. In the second pass, after the transposition, the colums are the | 311 ; rows. In the second pass, after the transposition, the colums are the |
| 312 ; original columns. | 312 ; original columns. |
| 313 ; The 1d transform is done by looping over bands of eight columns (the | 313 ; The 1d transform is done by looping over bands of eight columns (the |
| 314 ; idct32_bands loop). For each band, the transform input transposition | 314 ; idct32_bands loop). For each band, the transform input transposition |
| 315 ; is done on demand, one band of four 8x8 matrices at a time. The four | 315 ; is done on demand, one band of four 8x8 matrices at a time. The four |
| 316 ; matrices are transposed by pairs (the idct32_transpose_pair loop). | 316 ; matrices are transposed by pairs (the idct32_transpose_pair loop). |
| (...skipping 971 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1288 | 1288 |
| 1289 ; bands loop processing | 1289 ; bands loop processing |
| 1290 subs r4, r4, #1 | 1290 subs r4, r4, #1 |
| 1291 bne idct32_bands_loop | 1291 bne idct32_bands_loop |
| 1292 | 1292 |
| 1293 ; stack operation | 1293 ; stack operation |
| 1294 add sp, sp, #512+2048+2048 | 1294 add sp, sp, #512+2048+2048 |
| 1295 vpop {d8-d15} | 1295 vpop {d8-d15} |
| 1296 pop {r4-r11} | 1296 pop {r4-r11} |
| 1297 bx lr | 1297 bx lr |
| 1298 ENDP ; |vp9_idct32x32_1024_add_neon| | 1298 ENDP ; |vpx_idct32x32_1024_add_neon| |
| 1299 END | 1299 END |
| OLD | NEW |