OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 | |
12 EXPORT |vp8_dequant_dc_idct_add_neon| | |
13 ARM | |
14 REQUIRE8 | |
15 PRESERVE8 | |
16 | |
17 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
18 ;void vp8_dequant_dc_idct_add_neon(short *input, short *dq, unsigned char *pred, | |
19 ; unsigned char *dest, int pitch, int stride, | |
20 ; int Dc); | |
21 ; r0 short *input, | |
22 ; r1 short *dq, | |
23 ; r2 unsigned char *pred | |
24 ; r3 unsigned char *dest | |
25 ; sp int pitch | |
26 ; sp+4 int stride | |
27 ; sp+8 int Dc | |
28 |vp8_dequant_dc_idct_add_neon| PROC | |
29 vld1.16 {q3, q4}, [r0] | |
30 vld1.16 {q5, q6}, [r1] | |
31 | |
32 ldr r1, [sp, #8] ;load Dc from stack | |
33 | |
34 ldr r12, _CONSTANTS_ | |
35 | |
36 vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon | |
37 vmul.i16 q2, q4, q6 | |
38 | |
39 vmov.16 d2[0], r1 | |
40 | |
41 ldr r1, [sp] ; pitch | |
42 vld1.32 {d14[0]}, [r2], r1 | |
43 vld1.32 {d14[1]}, [r2], r1 | |
44 vld1.32 {d15[0]}, [r2], r1 | |
45 vld1.32 {d15[1]}, [r2] | |
46 | |
47 ldr r1, [sp, #4] ; stride | |
48 | |
49 ;|short_idct4x4llm_neon| PROC | |
50 vld1.16 {d0}, [r12] | |
51 vswp d3, d4 ;q2(vp[4] vp[12]) | |
52 | |
53 vqdmulh.s16 q3, q2, d0[2] | |
54 vqdmulh.s16 q4, q2, d0[0] | |
55 | |
56 vqadd.s16 d12, d2, d3 ;a1 | |
57 vqsub.s16 d13, d2, d3 ;b1 | |
58 | |
59 vshr.s16 q3, q3, #1 | |
60 vshr.s16 q4, q4, #1 | |
61 | |
62 vqadd.s16 q3, q3, q2 | |
63 vqadd.s16 q4, q4, q2 | |
64 | |
65 vqsub.s16 d10, d6, d9 ;c1 | |
66 vqadd.s16 d11, d7, d8 ;d1 | |
67 | |
68 vqadd.s16 d2, d12, d11 | |
69 vqadd.s16 d3, d13, d10 | |
70 vqsub.s16 d4, d13, d10 | |
71 vqsub.s16 d5, d12, d11 | |
72 | |
73 vtrn.32 d2, d4 | |
74 vtrn.32 d3, d5 | |
75 vtrn.16 d2, d3 | |
76 vtrn.16 d4, d5 | |
77 | |
78 ; memset(input, 0, 32) -- 32bytes | |
79 vmov.i16 q14, #0 | |
80 | |
81 vswp d3, d4 | |
82 vqdmulh.s16 q3, q2, d0[2] | |
83 vqdmulh.s16 q4, q2, d0[0] | |
84 | |
85 vqadd.s16 d12, d2, d3 ;a1 | |
86 vqsub.s16 d13, d2, d3 ;b1 | |
87 | |
88 vmov q15, q14 | |
89 | |
90 vshr.s16 q3, q3, #1 | |
91 vshr.s16 q4, q4, #1 | |
92 | |
93 vqadd.s16 q3, q3, q2 | |
94 vqadd.s16 q4, q4, q2 | |
95 | |
96 vqsub.s16 d10, d6, d9 ;c1 | |
97 vqadd.s16 d11, d7, d8 ;d1 | |
98 | |
99 vqadd.s16 d2, d12, d11 | |
100 vqadd.s16 d3, d13, d10 | |
101 vqsub.s16 d4, d13, d10 | |
102 vqsub.s16 d5, d12, d11 | |
103 | |
104 vst1.16 {q14, q15}, [r0] | |
105 | |
106 vrshr.s16 d2, d2, #3 | |
107 vrshr.s16 d3, d3, #3 | |
108 vrshr.s16 d4, d4, #3 | |
109 vrshr.s16 d5, d5, #3 | |
110 | |
111 vtrn.32 d2, d4 | |
112 vtrn.32 d3, d5 | |
113 vtrn.16 d2, d3 | |
114 vtrn.16 d4, d5 | |
115 | |
116 vaddw.u8 q1, q1, d14 | |
117 vaddw.u8 q2, q2, d15 | |
118 | |
119 vqmovun.s16 d0, q1 | |
120 vqmovun.s16 d1, q2 | |
121 | |
122 vst1.32 {d0[0]}, [r3], r1 | |
123 vst1.32 {d0[1]}, [r3], r1 | |
124 vst1.32 {d1[0]}, [r3], r1 | |
125 vst1.32 {d1[1]}, [r3] | |
126 | |
127 bx lr | |
128 | |
129 ENDP ; |vp8_dequant_dc_idct_add_neon| | |
130 | |
131 ; Constant Pool | |
132 _CONSTANTS_ DCD cospi8sqrt2minus1 | |
133 cospi8sqrt2minus1 DCD 0x4e7b4e7b | |
134 sinpi8sqrt2 DCD 0x8a8c8a8c | |
135 | |
136 END | |
OLD | NEW |