OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
11 EXPORT |vp9_idct8x8_64_add_neon| | 11 EXPORT |vpx_idct8x8_64_add_neon| |
12 EXPORT |vp9_idct8x8_12_add_neon| | 12 EXPORT |vpx_idct8x8_12_add_neon| |
13 ARM | 13 ARM |
14 REQUIRE8 | 14 REQUIRE8 |
15 PRESERVE8 | 15 PRESERVE8 |
16 | 16 |
17 AREA ||.text||, CODE, READONLY, ALIGN=2 | 17 AREA ||.text||, CODE, READONLY, ALIGN=2 |
18 | 18 |
19 ; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are | 19 ; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are |
20 ; loaded in q8-q15. The output will be stored back into q8-q15 registers. | 20 ; loaded in q8-q15. The output will be stored back into q8-q15 registers. |
21 ; This macro will touch q0-q7 registers and use them as buffer during | 21 ; This macro will touch q0-q7 registers and use them as buffer during |
22 ; calculation. | 22 ; calculation. |
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
191 vtrn.32 q9, q11 | 191 vtrn.32 q9, q11 |
192 vtrn.32 q12, q14 | 192 vtrn.32 q12, q14 |
193 vtrn.32 q13, q15 | 193 vtrn.32 q13, q15 |
194 vtrn.16 q8, q9 | 194 vtrn.16 q8, q9 |
195 vtrn.16 q10, q11 | 195 vtrn.16 q10, q11 |
196 vtrn.16 q12, q13 | 196 vtrn.16 q12, q13 |
197 vtrn.16 q14, q15 | 197 vtrn.16 q14, q15 |
198 MEND | 198 MEND |
199 | 199 |
200 AREA Block, CODE, READONLY ; name this block of code | 200 AREA Block, CODE, READONLY ; name this block of code |
201 ;void vp9_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) | 201 ;void vpx_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) |
202 ; | 202 ; |
203 ; r0 int16_t input | 203 ; r0 int16_t input |
204 ; r1 uint8_t *dest | 204 ; r1 uint8_t *dest |
205 ; r2 int dest_stride) | 205 ; r2 int dest_stride) |
206 | 206 |
207 |vp9_idct8x8_64_add_neon| PROC | 207 |vpx_idct8x8_64_add_neon| PROC |
208 push {r4-r9} | 208 push {r4-r9} |
209 vpush {d8-d15} | 209 vpush {d8-d15} |
210 vld1.s16 {q8,q9}, [r0]! | 210 vld1.s16 {q8,q9}, [r0]! |
211 vld1.s16 {q10,q11}, [r0]! | 211 vld1.s16 {q10,q11}, [r0]! |
212 vld1.s16 {q12,q13}, [r0]! | 212 vld1.s16 {q12,q13}, [r0]! |
213 vld1.s16 {q14,q15}, [r0]! | 213 vld1.s16 {q14,q15}, [r0]! |
214 | 214 |
215 ; transpose the input data | 215 ; transpose the input data |
216 TRANSPOSE8X8 | 216 TRANSPOSE8X8 |
217 | 217 |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
301 vst1.64 {d2}, [r0], r2 | 301 vst1.64 {d2}, [r0], r2 |
302 vst1.64 {d3}, [r0], r2 | 302 vst1.64 {d3}, [r0], r2 |
303 vst1.64 {d4}, [r0], r2 | 303 vst1.64 {d4}, [r0], r2 |
304 vst1.64 {d5}, [r0], r2 | 304 vst1.64 {d5}, [r0], r2 |
305 vst1.64 {d6}, [r0], r2 | 305 vst1.64 {d6}, [r0], r2 |
306 vst1.64 {d7}, [r0], r2 | 306 vst1.64 {d7}, [r0], r2 |
307 | 307 |
308 vpop {d8-d15} | 308 vpop {d8-d15} |
309 pop {r4-r9} | 309 pop {r4-r9} |
310 bx lr | 310 bx lr |
311 ENDP ; |vp9_idct8x8_64_add_neon| | 311 ENDP ; |vpx_idct8x8_64_add_neon| |
312 | 312 |
313 ;void vp9_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) | 313 ;void vpx_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) |
314 ; | 314 ; |
315 ; r0 int16_t input | 315 ; r0 int16_t input |
316 ; r1 uint8_t *dest | 316 ; r1 uint8_t *dest |
317 ; r2 int dest_stride) | 317 ; r2 int dest_stride) |
318 | 318 |
319 |vp9_idct8x8_12_add_neon| PROC | 319 |vpx_idct8x8_12_add_neon| PROC |
320 push {r4-r9} | 320 push {r4-r9} |
321 vpush {d8-d15} | 321 vpush {d8-d15} |
322 vld1.s16 {q8,q9}, [r0]! | 322 vld1.s16 {q8,q9}, [r0]! |
323 vld1.s16 {q10,q11}, [r0]! | 323 vld1.s16 {q10,q11}, [r0]! |
324 vld1.s16 {q12,q13}, [r0]! | 324 vld1.s16 {q12,q13}, [r0]! |
325 vld1.s16 {q14,q15}, [r0]! | 325 vld1.s16 {q14,q15}, [r0]! |
326 | 326 |
327 ; transpose the input data | 327 ; transpose the input data |
328 TRANSPOSE8X8 | 328 TRANSPOSE8X8 |
329 | 329 |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
507 vst1.64 {d2}, [r0], r2 | 507 vst1.64 {d2}, [r0], r2 |
508 vst1.64 {d3}, [r0], r2 | 508 vst1.64 {d3}, [r0], r2 |
509 vst1.64 {d4}, [r0], r2 | 509 vst1.64 {d4}, [r0], r2 |
510 vst1.64 {d5}, [r0], r2 | 510 vst1.64 {d5}, [r0], r2 |
511 vst1.64 {d6}, [r0], r2 | 511 vst1.64 {d6}, [r0], r2 |
512 vst1.64 {d7}, [r0], r2 | 512 vst1.64 {d7}, [r0], r2 |
513 | 513 |
514 vpop {d8-d15} | 514 vpop {d8-d15} |
515 pop {r4-r9} | 515 pop {r4-r9} |
516 bx lr | 516 bx lr |
517 ENDP ; |vp9_idct8x8_12_add_neon| | 517 ENDP ; |vpx_idct8x8_12_add_neon| |
518 | 518 |
519 END | 519 END |
OLD | NEW |