| OLD | NEW |
| (Empty) |
| 1 ; | |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
| 3 ; | |
| 4 ; Use of this source code is governed by a BSD-style license | |
| 5 ; that can be found in the LICENSE file in the root of the source | |
| 6 ; tree. An additional intellectual property rights grant can be found | |
| 7 ; in the file PATENTS. All contributing project authors may | |
| 8 ; be found in the AUTHORS file in the root of the source tree. | |
| 9 ; | |
| 10 | |
| 11 | |
| 12 EXPORT |vp8_dequant_idct_add_neon| | |
| 13 ARM | |
| 14 REQUIRE8 | |
| 15 PRESERVE8 | |
| 16 | |
| 17 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
| 18 ;void vp8_dequant_idct_add_neon(short *input, short *dq, | |
| 19 ; unsigned char *dest, int stride) | |
| 20 ; r0 short *input, | |
| 21 ; r1 short *dq, | |
| 22 ; r2 unsigned char *dest | |
| 23 ; r3 int stride | |
| 24 | |
| 25 |vp8_dequant_idct_add_neon| PROC | |
| 26 vld1.16 {q3, q4}, [r0] | |
| 27 vld1.16 {q5, q6}, [r1] | |
| 28 | |
| 29 add r1, r2, r3 ; r1 = dest + stride | |
| 30 lsl r3, #1 ; 2x stride | |
| 31 | |
| 32 vld1.32 {d14[0]}, [r2], r3 | |
| 33 vld1.32 {d14[1]}, [r1], r3 | |
| 34 vld1.32 {d15[0]}, [r2] | |
| 35 vld1.32 {d15[1]}, [r1] | |
| 36 | |
| 37 adr r12, cospi8sqrt2minus1 ; pointer to the first constant | |
| 38 | |
| 39 vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon | |
| 40 vmul.i16 q2, q4, q6 | |
| 41 | |
| 42 ;|short_idct4x4llm_neon| PROC | |
| 43 vld1.16 {d0}, [r12] | |
| 44 vswp d3, d4 ;q2(vp[4] vp[12]) | |
| 45 | |
| 46 vqdmulh.s16 q3, q2, d0[2] | |
| 47 vqdmulh.s16 q4, q2, d0[0] | |
| 48 | |
| 49 vqadd.s16 d12, d2, d3 ;a1 | |
| 50 vqsub.s16 d13, d2, d3 ;b1 | |
| 51 | |
| 52 vshr.s16 q3, q3, #1 | |
| 53 vshr.s16 q4, q4, #1 | |
| 54 | |
| 55 vqadd.s16 q3, q3, q2 | |
| 56 vqadd.s16 q4, q4, q2 | |
| 57 | |
| 58 vqsub.s16 d10, d6, d9 ;c1 | |
| 59 vqadd.s16 d11, d7, d8 ;d1 | |
| 60 | |
| 61 vqadd.s16 d2, d12, d11 | |
| 62 vqadd.s16 d3, d13, d10 | |
| 63 vqsub.s16 d4, d13, d10 | |
| 64 vqsub.s16 d5, d12, d11 | |
| 65 | |
| 66 vtrn.32 d2, d4 | |
| 67 vtrn.32 d3, d5 | |
| 68 vtrn.16 d2, d3 | |
| 69 vtrn.16 d4, d5 | |
| 70 | |
| 71 ; memset(input, 0, 32) -- 32bytes | |
| 72 vmov.i16 q14, #0 | |
| 73 | |
| 74 vswp d3, d4 | |
| 75 vqdmulh.s16 q3, q2, d0[2] | |
| 76 vqdmulh.s16 q4, q2, d0[0] | |
| 77 | |
| 78 vqadd.s16 d12, d2, d3 ;a1 | |
| 79 vqsub.s16 d13, d2, d3 ;b1 | |
| 80 | |
| 81 vmov q15, q14 | |
| 82 | |
| 83 vshr.s16 q3, q3, #1 | |
| 84 vshr.s16 q4, q4, #1 | |
| 85 | |
| 86 vqadd.s16 q3, q3, q2 | |
| 87 vqadd.s16 q4, q4, q2 | |
| 88 | |
| 89 vqsub.s16 d10, d6, d9 ;c1 | |
| 90 vqadd.s16 d11, d7, d8 ;d1 | |
| 91 | |
| 92 vqadd.s16 d2, d12, d11 | |
| 93 vqadd.s16 d3, d13, d10 | |
| 94 vqsub.s16 d4, d13, d10 | |
| 95 vqsub.s16 d5, d12, d11 | |
| 96 | |
| 97 vst1.16 {q14, q15}, [r0] | |
| 98 | |
| 99 vrshr.s16 d2, d2, #3 | |
| 100 vrshr.s16 d3, d3, #3 | |
| 101 vrshr.s16 d4, d4, #3 | |
| 102 vrshr.s16 d5, d5, #3 | |
| 103 | |
| 104 vtrn.32 d2, d4 | |
| 105 vtrn.32 d3, d5 | |
| 106 vtrn.16 d2, d3 | |
| 107 vtrn.16 d4, d5 | |
| 108 | |
| 109 vaddw.u8 q1, q1, d14 | |
| 110 vaddw.u8 q2, q2, d15 | |
| 111 | |
| 112 sub r2, r2, r3 | |
| 113 sub r1, r1, r3 | |
| 114 | |
| 115 vqmovun.s16 d0, q1 | |
| 116 vqmovun.s16 d1, q2 | |
| 117 | |
| 118 vst1.32 {d0[0]}, [r2], r3 | |
| 119 vst1.32 {d0[1]}, [r1], r3 | |
| 120 vst1.32 {d1[0]}, [r2] | |
| 121 vst1.32 {d1[1]}, [r1] | |
| 122 | |
| 123 bx lr | |
| 124 | |
| 125 ENDP ; |vp8_dequant_idct_add_neon| | |
| 126 | |
| 127 ; Constant Pool | |
| 128 cospi8sqrt2minus1 DCD 0x4e7b4e7b | |
| 129 sinpi8sqrt2 DCD 0x8a8c8a8c | |
| 130 | |
| 131 END | |
| OLD | NEW |