| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 EXPORT |vp9_idct8x8_64_add_neon| | 11 EXPORT |vpx_idct8x8_64_add_neon| |
| 12 EXPORT |vp9_idct8x8_12_add_neon| | 12 EXPORT |vpx_idct8x8_12_add_neon| |
| 13 ARM | 13 ARM |
| 14 REQUIRE8 | 14 REQUIRE8 |
| 15 PRESERVE8 | 15 PRESERVE8 |
| 16 | 16 |
| 17 AREA ||.text||, CODE, READONLY, ALIGN=2 | 17 AREA ||.text||, CODE, READONLY, ALIGN=2 |
| 18 | 18 |
| 19 ; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are | 19 ; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are |
| 20 ; loaded in q8-q15. The output will be stored back into q8-q15 registers. | 20 ; loaded in q8-q15. The output will be stored back into q8-q15 registers. |
| 21 ; This macro will touch q0-q7 registers and use them as buffer during | 21 ; This macro will touch q0-q7 registers and use them as buffer during |
| 22 ; calculation. | 22 ; calculation. |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 191 vtrn.32 q9, q11 | 191 vtrn.32 q9, q11 |
| 192 vtrn.32 q12, q14 | 192 vtrn.32 q12, q14 |
| 193 vtrn.32 q13, q15 | 193 vtrn.32 q13, q15 |
| 194 vtrn.16 q8, q9 | 194 vtrn.16 q8, q9 |
| 195 vtrn.16 q10, q11 | 195 vtrn.16 q10, q11 |
| 196 vtrn.16 q12, q13 | 196 vtrn.16 q12, q13 |
| 197 vtrn.16 q14, q15 | 197 vtrn.16 q14, q15 |
| 198 MEND | 198 MEND |
| 199 | 199 |
| 200 AREA Block, CODE, READONLY ; name this block of code | 200 AREA Block, CODE, READONLY ; name this block of code |
| 201 ;void vp9_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) | 201 ;void vpx_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) |
| 202 ; | 202 ; |
| 203 ; r0 int16_t input | 203 ; r0 int16_t input |
| 204 ; r1 uint8_t *dest | 204 ; r1 uint8_t *dest |
| 205 ; r2 int dest_stride) | 205 ; r2 int dest_stride) |
| 206 | 206 |
| 207 |vp9_idct8x8_64_add_neon| PROC | 207 |vpx_idct8x8_64_add_neon| PROC |
| 208 push {r4-r9} | 208 push {r4-r9} |
| 209 vpush {d8-d15} | 209 vpush {d8-d15} |
| 210 vld1.s16 {q8,q9}, [r0]! | 210 vld1.s16 {q8,q9}, [r0]! |
| 211 vld1.s16 {q10,q11}, [r0]! | 211 vld1.s16 {q10,q11}, [r0]! |
| 212 vld1.s16 {q12,q13}, [r0]! | 212 vld1.s16 {q12,q13}, [r0]! |
| 213 vld1.s16 {q14,q15}, [r0]! | 213 vld1.s16 {q14,q15}, [r0]! |
| 214 | 214 |
| 215 ; transpose the input data | 215 ; transpose the input data |
| 216 TRANSPOSE8X8 | 216 TRANSPOSE8X8 |
| 217 | 217 |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 301 vst1.64 {d2}, [r0], r2 | 301 vst1.64 {d2}, [r0], r2 |
| 302 vst1.64 {d3}, [r0], r2 | 302 vst1.64 {d3}, [r0], r2 |
| 303 vst1.64 {d4}, [r0], r2 | 303 vst1.64 {d4}, [r0], r2 |
| 304 vst1.64 {d5}, [r0], r2 | 304 vst1.64 {d5}, [r0], r2 |
| 305 vst1.64 {d6}, [r0], r2 | 305 vst1.64 {d6}, [r0], r2 |
| 306 vst1.64 {d7}, [r0], r2 | 306 vst1.64 {d7}, [r0], r2 |
| 307 | 307 |
| 308 vpop {d8-d15} | 308 vpop {d8-d15} |
| 309 pop {r4-r9} | 309 pop {r4-r9} |
| 310 bx lr | 310 bx lr |
| 311 ENDP ; |vp9_idct8x8_64_add_neon| | 311 ENDP ; |vpx_idct8x8_64_add_neon| |
| 312 | 312 |
| 313 ;void vp9_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) | 313 ;void vpx_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) |
| 314 ; | 314 ; |
| 315 ; r0 int16_t input | 315 ; r0 int16_t input |
| 316 ; r1 uint8_t *dest | 316 ; r1 uint8_t *dest |
| 317 ; r2 int dest_stride) | 317 ; r2 int dest_stride) |
| 318 | 318 |
| 319 |vp9_idct8x8_12_add_neon| PROC | 319 |vpx_idct8x8_12_add_neon| PROC |
| 320 push {r4-r9} | 320 push {r4-r9} |
| 321 vpush {d8-d15} | 321 vpush {d8-d15} |
| 322 vld1.s16 {q8,q9}, [r0]! | 322 vld1.s16 {q8,q9}, [r0]! |
| 323 vld1.s16 {q10,q11}, [r0]! | 323 vld1.s16 {q10,q11}, [r0]! |
| 324 vld1.s16 {q12,q13}, [r0]! | 324 vld1.s16 {q12,q13}, [r0]! |
| 325 vld1.s16 {q14,q15}, [r0]! | 325 vld1.s16 {q14,q15}, [r0]! |
| 326 | 326 |
| 327 ; transpose the input data | 327 ; transpose the input data |
| 328 TRANSPOSE8X8 | 328 TRANSPOSE8X8 |
| 329 | 329 |
| (...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 507 vst1.64 {d2}, [r0], r2 | 507 vst1.64 {d2}, [r0], r2 |
| 508 vst1.64 {d3}, [r0], r2 | 508 vst1.64 {d3}, [r0], r2 |
| 509 vst1.64 {d4}, [r0], r2 | 509 vst1.64 {d4}, [r0], r2 |
| 510 vst1.64 {d5}, [r0], r2 | 510 vst1.64 {d5}, [r0], r2 |
| 511 vst1.64 {d6}, [r0], r2 | 511 vst1.64 {d6}, [r0], r2 |
| 512 vst1.64 {d7}, [r0], r2 | 512 vst1.64 {d7}, [r0], r2 |
| 513 | 513 |
| 514 vpop {d8-d15} | 514 vpop {d8-d15} |
| 515 pop {r4-r9} | 515 pop {r4-r9} |
| 516 bx lr | 516 bx lr |
| 517 ENDP ; |vp9_idct8x8_12_add_neon| | 517 ENDP ; |vpx_idct8x8_12_add_neon| |
| 518 | 518 |
| 519 END | 519 END |
| OLD | NEW |