OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 | |
12 EXPORT |vp8_yv12_extend_frame_borders_neon| | |
13 ARM | |
14 REQUIRE8 | |
15 PRESERVE8 | |
16 | |
17 INCLUDE vpx_scale_asm_offsets.asm | |
18 | |
19 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf); | |
21 ; we depend on VP8BORDERINPIXELS being 32 | |
22 | |
23 |vp8_yv12_extend_frame_borders_neon| PROC | |
24 push {r4 - r10, lr} | |
25 vpush {d8 - d15} | |
26 | |
27 ; Border = 32 | |
28 ldr r3, [r0, #yv12_buffer_config_y_width] ; plane_width | |
29 ldr r1, [r0, #yv12_buffer_config_y_buffer] ; src_ptr1 | |
30 ldr r4, [r0, #yv12_buffer_config_y_height] ; plane_height | |
31 ldr lr, [r0, #yv12_buffer_config_y_stride] ; plane_stride | |
32 | |
33 ; Border copy for Y plane | |
34 ; copy the left and right most columns out | |
35 add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1
+ plane_width) | |
36 sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width
- 1 | |
37 sub r5, r1, #32 ; dest_ptr1 = src_ptr1 - Border | |
38 | |
39 mov r12, r4, lsr #2 ; plane_height / 4 | |
40 | |
41 copy_left_right_y | |
42 vld1.8 {d0[], d1[]}, [r1], lr | |
43 vld1.8 {d4[], d5[]}, [r2], lr | |
44 vld1.8 {d8[], d9[]}, [r1], lr | |
45 vld1.8 {d12[], d13[]}, [r2], lr | |
46 vld1.8 {d16[], d17[]}, [r1], lr | |
47 vld1.8 {d20[], d21[]}, [r2], lr | |
48 vld1.8 {d24[], d25[]}, [r1], lr | |
49 vld1.8 {d28[], d29[]}, [r2], lr | |
50 | |
51 vmov q1, q0 | |
52 vmov q3, q2 | |
53 vmov q5, q4 | |
54 vmov q7, q6 | |
55 vmov q9, q8 | |
56 vmov q11, q10 | |
57 vmov q13, q12 | |
58 vmov q15, q14 | |
59 | |
60 subs r12, r12, #1 | |
61 | |
62 vst1.8 {q0, q1}, [r5], lr | |
63 vst1.8 {q2, q3}, [r6], lr | |
64 vst1.8 {q4, q5}, [r5], lr | |
65 vst1.8 {q6, q7}, [r6], lr | |
66 vst1.8 {q8, q9}, [r5], lr | |
67 vst1.8 {q10, q11}, [r6], lr | |
68 vst1.8 {q12, q13}, [r5], lr | |
69 vst1.8 {q14, q15}, [r6], lr | |
70 | |
71 bne copy_left_right_y | |
72 | |
73 ;Now copy the top and bottom source lines into each line of the respective borde
rs | |
74 ldr r1, [r0, #yv12_buffer_config_y_buffer] ; y_buffer | |
75 mul r8, r4, lr ; plane_height * plane_stride | |
76 | |
77 ; copy width is plane_stride | |
78 movs r12, lr, lsr #7 ; plane_stride / 128 | |
79 | |
80 sub r1, r1, #32 ; src_ptr1 = y_buffer - Border | |
81 add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_strid
e (src_ptr1 + (plane_height * plane_stride)) | |
82 sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_heigh
t * plane_stride) - plane_stride | |
83 sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * p
lane_stride) | |
84 ble extra_y_copy_needed ; plane stride < 128 | |
85 | |
86 copy_top_bottom_y | |
87 vld1.8 {q0, q1}, [r1]! | |
88 vld1.8 {q8, q9}, [r2]! | |
89 vld1.8 {q2, q3}, [r1]! | |
90 vld1.8 {q10, q11}, [r2]! | |
91 vld1.8 {q4, q5}, [r1]! | |
92 vld1.8 {q12, q13}, [r2]! | |
93 vld1.8 {q6, q7}, [r1]! | |
94 vld1.8 {q14, q15}, [r2]! | |
95 | |
96 mov r7, #32 ; Border | |
97 | |
98 top_bottom_32 | |
99 subs r7, r7, #1 | |
100 | |
101 vst1.8 {q0, q1}, [r5]! | |
102 vst1.8 {q8, q9}, [r6]! | |
103 vst1.8 {q2, q3}, [r5]! | |
104 vst1.8 {q10, q11}, [r6]! | |
105 vst1.8 {q4, q5}, [r5]! | |
106 vst1.8 {q12, q13}, [r6]! | |
107 vst1.8 {q6, q7}, [r5]! | |
108 vst1.8 {q14, q15}, [r6]! | |
109 | |
110 add r5, r5, lr ; dest_ptr1 += plane_stride | |
111 sub r5, r5, #128 ; dest_ptr1 -= 128 | |
112 add r6, r6, lr ; dest_ptr2 += plane_stride | |
113 sub r6, r6, #128 ; dest_ptr2 -= 128 | |
114 | |
115 bne top_bottom_32 | |
116 | |
117 sub r5, r1, lr, asl #5 ; src_ptr1 - (Border* plane_stride) | |
118 add r6, r2, lr ; src_ptr2 + plane_stride | |
119 | |
120 subs r12, r12, #1 | |
121 bne copy_top_bottom_y | |
122 | |
123 extra_y_copy_needed | |
124 mov r7, lr, lsr #4 ; check to see if extra copy is need
ed | |
125 ands r7, r7, #0x7 | |
126 bne extra_top_bottom_y | |
127 end_of_border_copy_y | |
128 | |
129 ;Border copy for U, V planes | |
130 ; Border = 16 | |
131 ldr r7, [r0, #yv12_buffer_config_u_buffer] ; src_ptr1 | |
132 ldr lr, [r0, #yv12_buffer_config_uv_stride] ; plane_stride | |
133 ldr r3, [r0, #yv12_buffer_config_uv_width] ; plane_width | |
134 ldr r4, [r0, #yv12_buffer_config_uv_height] ; plane_height | |
135 | |
136 mov r10, #2 | |
137 | |
138 ;copy the left and right most columns out | |
139 border_copy_uv | |
140 mov r1, r7 ; src_ptr1 needs to be saved for sec
ond half of loop | |
141 sub r5, r1, #16 ; dest_ptr1 = src_ptr1 - Border | |
142 add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1
+ plane_width) | |
143 sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width
- 1 | |
144 | |
145 mov r12, r4, lsr #3 ; plane_height / 8 | |
146 | |
147 copy_left_right_uv | |
148 vld1.8 {d0[], d1[]}, [r1], lr | |
149 vld1.8 {d2[], d3[]}, [r2], lr | |
150 vld1.8 {d4[], d5[]}, [r1], lr | |
151 vld1.8 {d6[], d7[]}, [r2], lr | |
152 vld1.8 {d8[], d9[]}, [r1], lr | |
153 vld1.8 {d10[], d11[]}, [r2], lr | |
154 vld1.8 {d12[], d13[]}, [r1], lr | |
155 vld1.8 {d14[], d15[]}, [r2], lr | |
156 vld1.8 {d16[], d17[]}, [r1], lr | |
157 vld1.8 {d18[], d19[]}, [r2], lr | |
158 vld1.8 {d20[], d21[]}, [r1], lr | |
159 vld1.8 {d22[], d23[]}, [r2], lr | |
160 vld1.8 {d24[], d25[]}, [r1], lr | |
161 vld1.8 {d26[], d27[]}, [r2], lr | |
162 vld1.8 {d28[], d29[]}, [r1], lr | |
163 vld1.8 {d30[], d31[]}, [r2], lr | |
164 | |
165 subs r12, r12, #1 | |
166 | |
167 vst1.8 {q0}, [r5], lr | |
168 vst1.8 {q1}, [r6], lr | |
169 vst1.8 {q2}, [r5], lr | |
170 vst1.8 {q3}, [r6], lr | |
171 vst1.8 {q4}, [r5], lr | |
172 vst1.8 {q5}, [r6], lr | |
173 vst1.8 {q6}, [r5], lr | |
174 vst1.8 {q7}, [r6], lr | |
175 vst1.8 {q8}, [r5], lr | |
176 vst1.8 {q9}, [r6], lr | |
177 vst1.8 {q10}, [r5], lr | |
178 vst1.8 {q11}, [r6], lr | |
179 vst1.8 {q12}, [r5], lr | |
180 vst1.8 {q13}, [r6], lr | |
181 vst1.8 {q14}, [r5], lr | |
182 vst1.8 {q15}, [r6], lr | |
183 | |
184 bne copy_left_right_uv | |
185 | |
186 ;Now copy the top and bottom source lines into each line of the respective borde
rs | |
187 mov r1, r7 | |
188 mul r8, r4, lr ; plane_height * plane_stride | |
189 movs r12, lr, lsr #6 ; plane_stride / 64 | |
190 | |
191 sub r1, r1, #16 ; src_ptr1 = u_buffer - Border | |
192 add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_strid
e (src_ptr1 + (plane_height * plane_stride) | |
193 sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_heigh
t * plane_stride) - plane_stride | |
194 sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * p
lane_stride) | |
195 ble extra_uv_copy_needed ; plane_stride < 64 | |
196 | |
197 copy_top_bottom_uv | |
198 vld1.8 {q0, q1}, [r1]! | |
199 vld1.8 {q8, q9}, [r2]! | |
200 vld1.8 {q2, q3}, [r1]! | |
201 vld1.8 {q10, q11}, [r2]! | |
202 | |
203 mov r7, #16 ; Border | |
204 | |
205 top_bottom_16 | |
206 subs r7, r7, #1 | |
207 | |
208 vst1.8 {q0, q1}, [r5]! | |
209 vst1.8 {q8, q9}, [r6]! | |
210 vst1.8 {q2, q3}, [r5]! | |
211 vst1.8 {q10, q11}, [r6]! | |
212 | |
213 add r5, r5, lr ; dest_ptr1 += plane_stride | |
214 sub r5, r5, #64 | |
215 add r6, r6, lr ; dest_ptr2 += plane_stride | |
216 sub r6, r6, #64 | |
217 | |
218 bne top_bottom_16 | |
219 | |
220 sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * p
lane_stride) | |
221 add r6, r2, lr ; dest_ptr2 = src_ptr2 + plane_strid
e | |
222 | |
223 subs r12, r12, #1 | |
224 bne copy_top_bottom_uv | |
225 extra_uv_copy_needed | |
226 mov r7, lr, lsr #3 ; check to see if extra copy is need
ed | |
227 ands r7, r7, #0x7 | |
228 bne extra_top_bottom_uv | |
229 | |
230 end_of_border_copy_uv | |
231 subs r10, r10, #1 | |
232 ldrne r7, [r0, #yv12_buffer_config_v_buffer] ; src_ptr1 | |
233 bne border_copy_uv | |
234 | |
235 vpop {d8 - d15} | |
236 pop {r4 - r10, pc} | |
237 | |
238 ;;;;;;;;;;;;;;;;;;;;;; | |
239 extra_top_bottom_y | |
240 vld1.8 {q0}, [r1]! | |
241 vld1.8 {q2}, [r2]! | |
242 | |
243 mov r9, #4 ; 32 >> 3 | |
244 | |
245 extra_top_bottom_32 | |
246 subs r9, r9, #1 | |
247 | |
248 vst1.8 {q0}, [r5], lr | |
249 vst1.8 {q2}, [r6], lr | |
250 vst1.8 {q0}, [r5], lr | |
251 vst1.8 {q2}, [r6], lr | |
252 vst1.8 {q0}, [r5], lr | |
253 vst1.8 {q2}, [r6], lr | |
254 vst1.8 {q0}, [r5], lr | |
255 vst1.8 {q2}, [r6], lr | |
256 vst1.8 {q0}, [r5], lr | |
257 vst1.8 {q2}, [r6], lr | |
258 vst1.8 {q0}, [r5], lr | |
259 vst1.8 {q2}, [r6], lr | |
260 vst1.8 {q0}, [r5], lr | |
261 vst1.8 {q2}, [r6], lr | |
262 vst1.8 {q0}, [r5], lr | |
263 vst1.8 {q2}, [r6], lr | |
264 bne extra_top_bottom_32 | |
265 | |
266 sub r5, r1, lr, asl #5 ; src_ptr1 - (Border * plane_stride) | |
267 add r6, r2, lr ; src_ptr2 + plane_stride | |
268 subs r7, r7, #1 | |
269 bne extra_top_bottom_y | |
270 | |
271 b end_of_border_copy_y | |
272 | |
273 extra_top_bottom_uv | |
274 vld1.8 {d0}, [r1]! | |
275 vld1.8 {d8}, [r2]! | |
276 | |
277 mov r9, #2 ; 16 >> 3 | |
278 | |
279 extra_top_bottom_16 | |
280 subs r9, r9, #1 | |
281 | |
282 vst1.8 {d0}, [r5], lr | |
283 vst1.8 {d8}, [r6], lr | |
284 vst1.8 {d0}, [r5], lr | |
285 vst1.8 {d8}, [r6], lr | |
286 vst1.8 {d0}, [r5], lr | |
287 vst1.8 {d8}, [r6], lr | |
288 vst1.8 {d0}, [r5], lr | |
289 vst1.8 {d8}, [r6], lr | |
290 vst1.8 {d0}, [r5], lr | |
291 vst1.8 {d8}, [r6], lr | |
292 vst1.8 {d0}, [r5], lr | |
293 vst1.8 {d8}, [r6], lr | |
294 vst1.8 {d0}, [r5], lr | |
295 vst1.8 {d8}, [r6], lr | |
296 vst1.8 {d0}, [r5], lr | |
297 vst1.8 {d8}, [r6], lr | |
298 bne extra_top_bottom_16 | |
299 | |
300 sub r5, r1, lr, asl #4 ; src_ptr1 - (Border * plane_stride) | |
301 add r6, r2, lr ; src_ptr2 + plane_stride | |
302 subs r7, r7, #1 | |
303 bne extra_top_bottom_uv | |
304 | |
305 b end_of_border_copy_uv | |
306 | |
307 ENDP | |
308 END | |
OLD | NEW |