| OLD | NEW | 
|     1 ; |     1 ; | 
|     2 ;  Copyright (c) 2013 The WebM project authors. All Rights Reserved. |     2 ;  Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 
|     3 ; |     3 ; | 
|     4 ;  Use of this source code is governed by a BSD-style license |     4 ;  Use of this source code is governed by a BSD-style license | 
|     5 ;  that can be found in the LICENSE file in the root of the source |     5 ;  that can be found in the LICENSE file in the root of the source | 
|     6 ;  tree. An additional intellectual property rights grant can be found |     6 ;  tree. An additional intellectual property rights grant can be found | 
|     7 ;  in the file PATENTS.  All contributing project authors may |     7 ;  in the file PATENTS.  All contributing project authors may | 
|     8 ;  be found in the AUTHORS file in the root of the source tree. |     8 ;  be found in the AUTHORS file in the root of the source tree. | 
|     9 ; |     9 ; | 
|    10  |    10  | 
| (...skipping 25 matching lines...) Expand all  Loading... | 
|    36 cospi_24_64 EQU  6270 |    36 cospi_24_64 EQU  6270 | 
|    37 cospi_25_64 EQU  5520 |    37 cospi_25_64 EQU  5520 | 
|    38 cospi_26_64 EQU  4756 |    38 cospi_26_64 EQU  4756 | 
|    39 cospi_27_64 EQU  3981 |    39 cospi_27_64 EQU  3981 | 
|    40 cospi_28_64 EQU  3196 |    40 cospi_28_64 EQU  3196 | 
|    41 cospi_29_64 EQU  2404 |    41 cospi_29_64 EQU  2404 | 
|    42 cospi_30_64 EQU  1606 |    42 cospi_30_64 EQU  1606 | 
|    43 cospi_31_64 EQU   804 |    43 cospi_31_64 EQU   804 | 
|    44  |    44  | 
|    45  |    45  | 
|    46     EXPORT  |vp9_idct32x32_1024_add_neon| |    46     EXPORT  |vpx_idct32x32_1024_add_neon| | 
|    47     ARM |    47     ARM | 
|    48     REQUIRE8 |    48     REQUIRE8 | 
|    49     PRESERVE8 |    49     PRESERVE8 | 
|    50  |    50  | 
|    51     AREA ||.text||, CODE, READONLY, ALIGN=2 |    51     AREA ||.text||, CODE, READONLY, ALIGN=2 | 
|    52  |    52  | 
|    53     AREA     Block, CODE, READONLY |    53     AREA     Block, CODE, READONLY | 
|    54  |    54  | 
|    55     ; -------------------------------------------------------------------------- |    55     ; -------------------------------------------------------------------------- | 
|    56     ; Load from transposed_buffer |    56     ; Load from transposed_buffer | 
| (...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|   281     MEND |   281     MEND | 
|   282     ; -------------------------------------------------------------------------- |   282     ; -------------------------------------------------------------------------- | 
|   283     ; Touches q8-q12, q15 (q13-q14 are preserved) |   283     ; Touches q8-q12, q15 (q13-q14 are preserved) | 
|   284     ; valid output registers are anything but q8-q11 |   284     ; valid output registers are anything but q8-q11 | 
|   285     MACRO |   285     MACRO | 
|   286     DO_BUTTERFLY_STD $first_constant, $second_constant, $reg1, $reg2, $reg3, $re
      g4 |   286     DO_BUTTERFLY_STD $first_constant, $second_constant, $reg1, $reg2, $reg3, $re
      g4 | 
|   287     DO_BUTTERFLY d28, d29, d26, d27, $first_constant, $second_constant, $reg1, $
      reg2, $reg3, $reg4 |   287     DO_BUTTERFLY d28, d29, d26, d27, $first_constant, $second_constant, $reg1, $
      reg2, $reg3, $reg4 | 
|   288     MEND |   288     MEND | 
|   289     ; -------------------------------------------------------------------------- |   289     ; -------------------------------------------------------------------------- | 
|   290  |   290  | 
|   291 ;void vp9_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride
      ); |   291 ;void vpx_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride
      ); | 
|   292 ; |   292 ; | 
|   293 ;   r0  int16_t *input, |   293 ;   r0  int16_t *input, | 
|   294 ;   r1  uint8_t *dest, |   294 ;   r1  uint8_t *dest, | 
|   295 ;   r2  int dest_stride) |   295 ;   r2  int dest_stride) | 
|   296 ; loop counters |   296 ; loop counters | 
|   297 ;   r4  bands loop counter |   297 ;   r4  bands loop counter | 
|   298 ;   r5  pass loop counter |   298 ;   r5  pass loop counter | 
|   299 ;   r8  transpose loop counter |   299 ;   r8  transpose loop counter | 
|   300 ; combine-add pointers |   300 ; combine-add pointers | 
|   301 ;   r6  dest + 31 * dest_stride, descending (30, 29, 28, ...) |   301 ;   r6  dest + 31 * dest_stride, descending (30, 29, 28, ...) | 
|   302 ;   r7  dest +  0 * dest_stride, ascending  (1, 2, 3, ...) |   302 ;   r7  dest +  0 * dest_stride, ascending  (1, 2, 3, ...) | 
|   303 ;   r9  dest + 15 * dest_stride, descending (14, 13, 12, ...) |   303 ;   r9  dest + 15 * dest_stride, descending (14, 13, 12, ...) | 
|   304 ;   r10 dest + 16 * dest_stride, ascending  (17, 18, 19, ...) |   304 ;   r10 dest + 16 * dest_stride, ascending  (17, 18, 19, ...) | 
|   305  |   305  | 
|   306 |vp9_idct32x32_1024_add_neon| PROC |   306 |vpx_idct32x32_1024_add_neon| PROC | 
|   307     ; This function does one pass of idct32x32 transform. |   307     ; This function does one pass of idct32x32 transform. | 
|   308     ; |   308     ; | 
|   309     ; This is done by transposing the input and then doing a 1d transform on |   309     ; This is done by transposing the input and then doing a 1d transform on | 
|   310     ; columns. In the first pass, the transposed columns are the original |   310     ; columns. In the first pass, the transposed columns are the original | 
|   311     ; rows. In the second pass, after the transposition, the colums are the |   311     ; rows. In the second pass, after the transposition, the colums are the | 
|   312     ; original columns. |   312     ; original columns. | 
|   313     ; The 1d transform is done by looping over bands of eight columns (the |   313     ; The 1d transform is done by looping over bands of eight columns (the | 
|   314     ; idct32_bands loop). For each band, the transform input transposition |   314     ; idct32_bands loop). For each band, the transform input transposition | 
|   315     ; is done on demand, one band of four 8x8 matrices at a time. The four |   315     ; is done on demand, one band of four 8x8 matrices at a time. The four | 
|   316     ; matrices are transposed by pairs (the idct32_transpose_pair loop). |   316     ; matrices are transposed by pairs (the idct32_transpose_pair loop). | 
| (...skipping 971 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  1288  |  1288  | 
|  1289     ; bands loop processing |  1289     ; bands loop processing | 
|  1290     subs r4, r4, #1 |  1290     subs r4, r4, #1 | 
|  1291     bne idct32_bands_loop |  1291     bne idct32_bands_loop | 
|  1292  |  1292  | 
|  1293     ; stack operation |  1293     ; stack operation | 
|  1294     add sp, sp, #512+2048+2048 |  1294     add sp, sp, #512+2048+2048 | 
|  1295     vpop {d8-d15} |  1295     vpop {d8-d15} | 
|  1296     pop  {r4-r11} |  1296     pop  {r4-r11} | 
|  1297     bx              lr |  1297     bx              lr | 
|  1298     ENDP  ; |vp9_idct32x32_1024_add_neon| |  1298     ENDP  ; |vpx_idct32x32_1024_add_neon| | 
|  1299     END |  1299     END | 
| OLD | NEW |