| OLD | NEW |
| (Empty) |
| 1 ; | |
| 2 ; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. | |
| 3 ; | |
| 4 ; Use of this source code is governed by a BSD-style license | |
| 5 ; that can be found in the LICENSE file in the root of the source | |
| 6 ; tree. An additional intellectual property rights grant can be found | |
| 7 ; in the file PATENTS. All contributing project authors may | |
| 8 ; be found in the AUTHORS file in the root of the source tree. | |
| 9 ; | |
| 10 | |
| 11 | |
| 12 EXPORT |vp8_dequant_dc_idct_add_neon| | |
| 13 ARM | |
| 14 REQUIRE8 | |
| 15 PRESERVE8 | |
| 16 | |
| 17 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
| 18 ;void vp8_dequant_dc_idct_add_neon(short *input, short *dq, unsigned char *pred, | |
| 19 ; unsigned char *dest, int pitch, int stride, | |
| 20 ; int Dc); | |
| 21 ; r0 short *input, | |
| 22 ; r1 short *dq, | |
| 23 ; r2 unsigned char *pred | |
| 24 ; r3 unsigned char *dest | |
| 25 ; sp int pitch | |
| 26 ; sp+4 int stride | |
| 27 ; sp+8 int Dc | |
| 28 |vp8_dequant_dc_idct_add_neon| PROC | |
| 29 vld1.16 {q3, q4}, [r0] | |
| 30 vld1.16 {q5, q6}, [r1] | |
| 31 | |
| 32 ldr r1, [sp, #8] ;load Dc from stack | |
| 33 | |
| 34 ldr r12, _CONSTANTS_ | |
| 35 | |
| 36 vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon | |
| 37 vmul.i16 q2, q4, q6 | |
| 38 | |
| 39 vmov.16 d2[0], r1 | |
| 40 | |
| 41 ldr r1, [sp] ; pitch | |
| 42 vld1.32 {d14[0]}, [r2], r1 | |
| 43 vld1.32 {d14[1]}, [r2], r1 | |
| 44 vld1.32 {d15[0]}, [r2], r1 | |
| 45 vld1.32 {d15[1]}, [r2] | |
| 46 | |
| 47 ldr r1, [sp, #4] ; stride | |
| 48 | |
| 49 ;|short_idct4x4llm_neon| PROC | |
| 50 vld1.16 {d0}, [r12] | |
| 51 vswp d3, d4 ;q2(vp[4] vp[12]) | |
| 52 | |
| 53 vqdmulh.s16 q3, q2, d0[2] | |
| 54 vqdmulh.s16 q4, q2, d0[0] | |
| 55 | |
| 56 vqadd.s16 d12, d2, d3 ;a1 | |
| 57 vqsub.s16 d13, d2, d3 ;b1 | |
| 58 | |
| 59 vshr.s16 q3, q3, #1 | |
| 60 vshr.s16 q4, q4, #1 | |
| 61 | |
| 62 vqadd.s16 q3, q3, q2 | |
| 63 vqadd.s16 q4, q4, q2 | |
| 64 | |
| 65 vqsub.s16 d10, d6, d9 ;c1 | |
| 66 vqadd.s16 d11, d7, d8 ;d1 | |
| 67 | |
| 68 vqadd.s16 d2, d12, d11 | |
| 69 vqadd.s16 d3, d13, d10 | |
| 70 vqsub.s16 d4, d13, d10 | |
| 71 vqsub.s16 d5, d12, d11 | |
| 72 | |
| 73 vtrn.32 d2, d4 | |
| 74 vtrn.32 d3, d5 | |
| 75 vtrn.16 d2, d3 | |
| 76 vtrn.16 d4, d5 | |
| 77 | |
| 78 ; memset(input, 0, 32) -- 32bytes | |
| 79 vmov.i16 q14, #0 | |
| 80 | |
| 81 vswp d3, d4 | |
| 82 vqdmulh.s16 q3, q2, d0[2] | |
| 83 vqdmulh.s16 q4, q2, d0[0] | |
| 84 | |
| 85 vqadd.s16 d12, d2, d3 ;a1 | |
| 86 vqsub.s16 d13, d2, d3 ;b1 | |
| 87 | |
| 88 vmov q15, q14 | |
| 89 | |
| 90 vshr.s16 q3, q3, #1 | |
| 91 vshr.s16 q4, q4, #1 | |
| 92 | |
| 93 vqadd.s16 q3, q3, q2 | |
| 94 vqadd.s16 q4, q4, q2 | |
| 95 | |
| 96 vqsub.s16 d10, d6, d9 ;c1 | |
| 97 vqadd.s16 d11, d7, d8 ;d1 | |
| 98 | |
| 99 vqadd.s16 d2, d12, d11 | |
| 100 vqadd.s16 d3, d13, d10 | |
| 101 vqsub.s16 d4, d13, d10 | |
| 102 vqsub.s16 d5, d12, d11 | |
| 103 | |
| 104 vst1.16 {q14, q15}, [r0] | |
| 105 | |
| 106 vrshr.s16 d2, d2, #3 | |
| 107 vrshr.s16 d3, d3, #3 | |
| 108 vrshr.s16 d4, d4, #3 | |
| 109 vrshr.s16 d5, d5, #3 | |
| 110 | |
| 111 vtrn.32 d2, d4 | |
| 112 vtrn.32 d3, d5 | |
| 113 vtrn.16 d2, d3 | |
| 114 vtrn.16 d4, d5 | |
| 115 | |
| 116 vaddw.u8 q1, q1, d14 | |
| 117 vaddw.u8 q2, q2, d15 | |
| 118 | |
| 119 vqmovun.s16 d0, q1 | |
| 120 vqmovun.s16 d1, q2 | |
| 121 | |
| 122 vst1.32 {d0[0]}, [r3], r1 | |
| 123 vst1.32 {d0[1]}, [r3], r1 | |
| 124 vst1.32 {d1[0]}, [r3], r1 | |
| 125 vst1.32 {d1[1]}, [r3] | |
| 126 | |
| 127 bx lr | |
| 128 | |
| 129 ENDP ; |vp8_dequant_dc_idct_add_neon| | |
| 130 | |
| 131 ; Constant Pool | |
| 132 _CONSTANTS_ DCD cospi8sqrt2minus1 | |
| 133 cospi8sqrt2minus1 DCD 0x4e7b4e7b | |
| 134 sinpi8sqrt2 DCD 0x8a8c8a8c | |
| 135 | |
| 136 END | |
| OLD | NEW |