OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 | |
12 EXPORT |vp8_yv12_copy_frame_func_neon| | |
13 ARM | |
14 REQUIRE8 | |
15 PRESERVE8 | |
16 | |
17 INCLUDE vpx_scale_asm_offsets.asm | |
18 | |
19 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
20 | |
21 ;void vp8_yv12_copy_frame_func_neon(const YV12_BUFFER_CONFIG *src_ybc, | |
22 ; YV12_BUFFER_CONFIG *dst_ybc); | |
23 | |
24 |vp8_yv12_copy_frame_func_neon| PROC | |
25 push {r4 - r11, lr} | |
26 vpush {d8 - d15} | |
27 | |
28 sub sp, sp, #16 | |
29 | |
30 ;Copy Y plane | |
31 ldr r8, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 | |
32 ldr r9, [r1, #yv12_buffer_config_u_buffer] ;srcptr1 | |
33 ldr r10, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 | |
34 ldr r11, [r1, #yv12_buffer_config_v_buffer] ;srcptr1 | |
35 | |
36 ldr r4, [r0, #yv12_buffer_config_y_height] | |
37 ldr r5, [r0, #yv12_buffer_config_y_width] | |
38 ldr r6, [r0, #yv12_buffer_config_y_stride] | |
39 ldr r7, [r1, #yv12_buffer_config_y_stride] | |
40 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 | |
41 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 | |
42 | |
43 str r8, [sp] | |
44 str r9, [sp, #4] | |
45 str r10, [sp, #8] | |
46 str r11, [sp, #12] | |
47 | |
48 ; copy two rows at one time | |
49 mov lr, r4, lsr #1 | |
50 | |
51 cp_src_to_dst_height_loop | |
52 mov r8, r2 | |
53 mov r9, r3 | |
54 add r10, r2, r6 | |
55 add r11, r3, r7 | |
56 movs r12, r5, lsr #7 | |
57 ble extra_cp_needed ; y_width < 128 | |
58 | |
59 cp_src_to_dst_width_loop | |
60 vld1.8 {q0, q1}, [r8]! | |
61 vld1.8 {q8, q9}, [r10]! | |
62 vld1.8 {q2, q3}, [r8]! | |
63 vld1.8 {q10, q11}, [r10]! | |
64 vld1.8 {q4, q5}, [r8]! | |
65 vld1.8 {q12, q13}, [r10]! | |
66 vld1.8 {q6, q7}, [r8]! | |
67 vld1.8 {q14, q15}, [r10]! | |
68 | |
69 subs r12, r12, #1 | |
70 | |
71 vst1.8 {q0, q1}, [r9]! | |
72 vst1.8 {q8, q9}, [r11]! | |
73 vst1.8 {q2, q3}, [r9]! | |
74 vst1.8 {q10, q11}, [r11]! | |
75 vst1.8 {q4, q5}, [r9]! | |
76 vst1.8 {q12, q13}, [r11]! | |
77 vst1.8 {q6, q7}, [r9]! | |
78 vst1.8 {q14, q15}, [r11]! | |
79 | |
80 bne cp_src_to_dst_width_loop | |
81 | |
82 subs lr, lr, #1 | |
83 add r2, r2, r6, lsl #1 | |
84 add r3, r3, r7, lsl #1 | |
85 | |
86 bne cp_src_to_dst_height_loop | |
87 | |
88 extra_cp_needed | |
89 ands r10, r5, #0x7f ;check to see if extra copy
is needed | |
90 sub r11, r5, r10 | |
91 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 | |
92 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 | |
93 bne extra_cp_src_to_dst_width | |
94 end_of_cp_src_to_dst | |
95 | |
96 ;Copy U & V planes | |
97 ldr r2, [sp] ;srcptr1 | |
98 ldr r3, [sp, #4] ;dstptr1 | |
99 mov r4, r4, lsr #1 ;src uv_height | |
100 mov r5, r5, lsr #1 ;src uv_width | |
101 mov r6, r6, lsr #1 ;src uv_stride | |
102 mov r7, r7, lsr #1 ;dst uv_stride | |
103 | |
104 mov r1, #2 | |
105 | |
106 cp_uv_loop | |
107 | |
108 ;copy two rows at one time | |
109 mov lr, r4, lsr #1 | |
110 | |
111 cp_src_to_dst_height_uv_loop | |
112 mov r8, r2 | |
113 mov r9, r3 | |
114 add r10, r2, r6 | |
115 add r11, r3, r7 | |
116 movs r12, r5, lsr #6 | |
117 ble extra_uv_cp_needed | |
118 | |
119 cp_src_to_dst_width_uv_loop | |
120 vld1.8 {q0, q1}, [r8]! | |
121 vld1.8 {q8, q9}, [r10]! | |
122 vld1.8 {q2, q3}, [r8]! | |
123 vld1.8 {q10, q11}, [r10]! | |
124 | |
125 subs r12, r12, #1 | |
126 | |
127 vst1.8 {q0, q1}, [r9]! | |
128 vst1.8 {q8, q9}, [r11]! | |
129 vst1.8 {q2, q3}, [r9]! | |
130 vst1.8 {q10, q11}, [r11]! | |
131 | |
132 bne cp_src_to_dst_width_uv_loop | |
133 | |
134 subs lr, lr, #1 | |
135 add r2, r2, r6, lsl #1 | |
136 add r3, r3, r7, lsl #1 | |
137 | |
138 bne cp_src_to_dst_height_uv_loop | |
139 | |
140 extra_uv_cp_needed | |
141 ands r10, r5, #0x3f ;check to see if extra copy
is needed | |
142 sub r11, r5, r10 | |
143 ldr r2, [sp] ;srcptr1 | |
144 ldr r3, [sp, #4] ;dstptr1 | |
145 bne extra_cp_src_to_dst_uv_width | |
146 end_of_cp_src_to_dst_uv | |
147 | |
148 subs r1, r1, #1 | |
149 | |
150 addne sp, sp, #8 | |
151 | |
152 ldrne r2, [sp] ;srcptr1 | |
153 ldrne r3, [sp, #4] ;dstptr1 | |
154 | |
155 bne cp_uv_loop | |
156 | |
157 add sp, sp, #8 | |
158 | |
159 vpop {d8 - d15} | |
160 pop {r4 - r11, pc} | |
161 | |
162 ;============================= | |
163 extra_cp_src_to_dst_width | |
164 add r2, r2, r11 | |
165 add r3, r3, r11 | |
166 add r0, r8, r6 | |
167 add r11, r9, r7 | |
168 | |
169 mov lr, r4, lsr #1 | |
170 extra_cp_src_to_dst_height_loop | |
171 mov r8, r2 | |
172 mov r9, r3 | |
173 add r0, r8, r6 | |
174 add r11, r9, r7 | |
175 | |
176 mov r12, r10 | |
177 | |
178 extra_cp_src_to_dst_width_loop | |
179 vld1.8 {q0}, [r8]! | |
180 vld1.8 {q1}, [r0]! | |
181 | |
182 subs r12, r12, #16 | |
183 | |
184 vst1.8 {q0}, [r9]! | |
185 vst1.8 {q1}, [r11]! | |
186 bne extra_cp_src_to_dst_width_loop | |
187 | |
188 subs lr, lr, #1 | |
189 | |
190 add r2, r2, r6, lsl #1 | |
191 add r3, r3, r7, lsl #1 | |
192 | |
193 bne extra_cp_src_to_dst_height_loop | |
194 | |
195 b end_of_cp_src_to_dst | |
196 | |
197 ;================================= | |
198 extra_cp_src_to_dst_uv_width | |
199 add r2, r2, r11 | |
200 add r3, r3, r11 | |
201 add r0, r8, r6 | |
202 add r11, r9, r7 | |
203 | |
204 mov lr, r4, lsr #1 | |
205 extra_cp_src_to_dst_height_uv_loop | |
206 mov r8, r2 | |
207 mov r9, r3 | |
208 add r0, r8, r6 | |
209 add r11, r9, r7 | |
210 | |
211 mov r12, r10 | |
212 | |
213 extra_cp_src_to_dst_width_uv_loop | |
214 vld1.8 {d0}, [r8]! | |
215 vld1.8 {d1}, [r0]! | |
216 | |
217 subs r12, r12, #8 | |
218 | |
219 vst1.8 {d0}, [r9]! | |
220 vst1.8 {d1}, [r11]! | |
221 bne extra_cp_src_to_dst_width_uv_loop | |
222 | |
223 subs lr, lr, #1 | |
224 | |
225 add r2, r2, r6, lsl #1 | |
226 add r3, r3, r7, lsl #1 | |
227 | |
228 bne extra_cp_src_to_dst_height_uv_loop | |
229 | |
230 b end_of_cp_src_to_dst_uv | |
231 | |
232 ENDP | |
233 END | |
OLD | NEW |