OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 | |
12 EXPORT |vp8_yv12_copy_src_frame_func_neon| | |
13 ARM | |
14 REQUIRE8 | |
15 PRESERVE8 | |
16 | |
17 INCLUDE vpx_scale_asm_offsets.asm | |
18 | |
19 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
20 ;Note: This function is used to copy source data in src_buffer[i] at beginning | |
21 ;of the encoding. The buffer has a width and height of cpi->oxcf.Width and | |
22 ;cpi->oxcf.Height, which can be ANY numbers(NOT always multiples of 16 or 4). | |
23 | |
24 ;void vp8_yv12_copy_src_frame_func_neon(const YV12_BUFFER_CONFIG *src_ybc, | |
25 ; YV12_BUFFER_CONFIG *dst_ybc); | |
26 | |
27 |vp8_yv12_copy_src_frame_func_neon| PROC | |
28 push {r4 - r11, lr} | |
29 vpush {d8 - d15} | |
30 | |
31 ;Copy Y plane | |
32 ldr r4, [r0, #yv12_buffer_config_y_height] | |
33 ldr r5, [r0, #yv12_buffer_config_y_width] | |
34 ldr r6, [r0, #yv12_buffer_config_y_stride] | |
35 ldr r7, [r1, #yv12_buffer_config_y_stride] | |
36 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 | |
37 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 | |
38 | |
39 add r10, r2, r6 ;second row src | |
40 add r11, r3, r7 ;second row dst | |
41 mov r6, r6, lsl #1 | |
42 mov r7, r7, lsl #1 | |
43 sub r6, r6, r5 ;adjust stride | |
44 sub r7, r7, r5 | |
45 | |
46 ; copy two rows at one time | |
47 mov lr, r4, lsr #1 | |
48 | |
49 cp_src_to_dst_height_loop | |
50 mov r12, r5 | |
51 | |
52 cp_width_128_loop | |
53 vld1.8 {q0, q1}, [r2]! | |
54 vld1.8 {q4, q5}, [r10]! | |
55 vld1.8 {q2, q3}, [r2]! | |
56 vld1.8 {q6, q7}, [r10]! | |
57 vld1.8 {q8, q9}, [r2]! | |
58 vld1.8 {q12, q13}, [r10]! | |
59 vld1.8 {q10, q11}, [r2]! | |
60 vld1.8 {q14, q15}, [r10]! | |
61 sub r12, r12, #128 | |
62 cmp r12, #128 | |
63 vst1.8 {q0, q1}, [r3]! | |
64 vst1.8 {q4, q5}, [r11]! | |
65 vst1.8 {q2, q3}, [r3]! | |
66 vst1.8 {q6, q7}, [r11]! | |
67 vst1.8 {q8, q9}, [r3]! | |
68 vst1.8 {q12, q13}, [r11]! | |
69 vst1.8 {q10, q11}, [r3]! | |
70 vst1.8 {q14, q15}, [r11]! | |
71 bhs cp_width_128_loop | |
72 | |
73 cmp r12, #0 | |
74 beq cp_width_done | |
75 | |
76 cp_width_8_loop | |
77 vld1.8 {d0}, [r2]! | |
78 vld1.8 {d1}, [r10]! | |
79 sub r12, r12, #8 | |
80 cmp r12, #8 | |
81 vst1.8 {d0}, [r3]! | |
82 vst1.8 {d1}, [r11]! | |
83 bhs cp_width_8_loop | |
84 | |
85 cmp r12, #0 | |
86 beq cp_width_done | |
87 | |
88 cp_width_1_loop | |
89 ldrb r8, [r2], #1 | |
90 subs r12, r12, #1 | |
91 strb r8, [r3], #1 | |
92 ldrb r8, [r10], #1 | |
93 strb r8, [r11], #1 | |
94 bne cp_width_1_loop | |
95 | |
96 cp_width_done | |
97 subs lr, lr, #1 | |
98 add r2, r2, r6 | |
99 add r3, r3, r7 | |
100 add r10, r10, r6 | |
101 add r11, r11, r7 | |
102 bne cp_src_to_dst_height_loop | |
103 | |
104 ;copy last line for Y if y_height is odd | |
105 tst r4, #1 | |
106 beq cp_width_done_1 | |
107 mov r12, r5 | |
108 | |
109 cp_width_128_loop_1 | |
110 vld1.8 {q0, q1}, [r2]! | |
111 vld1.8 {q2, q3}, [r2]! | |
112 vld1.8 {q8, q9}, [r2]! | |
113 vld1.8 {q10, q11}, [r2]! | |
114 sub r12, r12, #128 | |
115 cmp r12, #128 | |
116 vst1.8 {q0, q1}, [r3]! | |
117 vst1.8 {q2, q3}, [r3]! | |
118 vst1.8 {q8, q9}, [r3]! | |
119 vst1.8 {q10, q11}, [r3]! | |
120 bhs cp_width_128_loop_1 | |
121 | |
122 cmp r12, #0 | |
123 beq cp_width_done_1 | |
124 | |
125 cp_width_8_loop_1 | |
126 vld1.8 {d0}, [r2]! | |
127 sub r12, r12, #8 | |
128 cmp r12, #8 | |
129 vst1.8 {d0}, [r3]! | |
130 bhs cp_width_8_loop_1 | |
131 | |
132 cmp r12, #0 | |
133 beq cp_width_done_1 | |
134 | |
135 cp_width_1_loop_1 | |
136 ldrb r8, [r2], #1 | |
137 subs r12, r12, #1 | |
138 strb r8, [r3], #1 | |
139 bne cp_width_1_loop_1 | |
140 cp_width_done_1 | |
141 | |
142 ;Copy U & V planes | |
143 ldr r4, [r0, #yv12_buffer_config_uv_height] | |
144 ldr r5, [r0, #yv12_buffer_config_uv_width] | |
145 ldr r6, [r0, #yv12_buffer_config_uv_stride] | |
146 ldr r7, [r1, #yv12_buffer_config_uv_stride] | |
147 ldr r2, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 | |
148 ldr r3, [r1, #yv12_buffer_config_u_buffer] ;dstptr1 | |
149 | |
150 add r10, r2, r6 ;second row src | |
151 add r11, r3, r7 ;second row dst | |
152 mov r6, r6, lsl #1 | |
153 mov r7, r7, lsl #1 | |
154 sub r6, r6, r5 ;adjust stride | |
155 sub r7, r7, r5 | |
156 | |
157 mov r9, #2 | |
158 | |
159 cp_uv_loop | |
160 ;copy two rows at one time | |
161 mov lr, r4, lsr #1 | |
162 | |
163 cp_src_to_dst_height_uv_loop | |
164 mov r12, r5 | |
165 | |
166 cp_width_uv_64_loop | |
167 vld1.8 {q0, q1}, [r2]! | |
168 vld1.8 {q4, q5}, [r10]! | |
169 vld1.8 {q2, q3}, [r2]! | |
170 vld1.8 {q6, q7}, [r10]! | |
171 sub r12, r12, #64 | |
172 cmp r12, #64 | |
173 vst1.8 {q0, q1}, [r3]! | |
174 vst1.8 {q4, q5}, [r11]! | |
175 vst1.8 {q2, q3}, [r3]! | |
176 vst1.8 {q6, q7}, [r11]! | |
177 bhs cp_width_uv_64_loop | |
178 | |
179 cmp r12, #0 | |
180 beq cp_width_uv_done | |
181 | |
182 cp_width_uv_8_loop | |
183 vld1.8 {d0}, [r2]! | |
184 vld1.8 {d1}, [r10]! | |
185 sub r12, r12, #8 | |
186 cmp r12, #8 | |
187 vst1.8 {d0}, [r3]! | |
188 vst1.8 {d1}, [r11]! | |
189 bhs cp_width_uv_8_loop | |
190 | |
191 cmp r12, #0 | |
192 beq cp_width_uv_done | |
193 | |
194 cp_width_uv_1_loop | |
195 ldrb r8, [r2], #1 | |
196 subs r12, r12, #1 | |
197 strb r8, [r3], #1 | |
198 ldrb r8, [r10], #1 | |
199 strb r8, [r11], #1 | |
200 bne cp_width_uv_1_loop | |
201 | |
202 cp_width_uv_done | |
203 subs lr, lr, #1 | |
204 add r2, r2, r6 | |
205 add r3, r3, r7 | |
206 add r10, r10, r6 | |
207 add r11, r11, r7 | |
208 bne cp_src_to_dst_height_uv_loop | |
209 | |
210 ;copy last line for U & V if uv_height is odd | |
211 tst r4, #1 | |
212 beq cp_width_uv_done_1 | |
213 mov r12, r5 | |
214 | |
215 cp_width_uv_64_loop_1 | |
216 vld1.8 {q0, q1}, [r2]! | |
217 vld1.8 {q2, q3}, [r2]! | |
218 sub r12, r12, #64 | |
219 cmp r12, #64 | |
220 vst1.8 {q0, q1}, [r3]! | |
221 vst1.8 {q2, q3}, [r3]! | |
222 bhs cp_width_uv_64_loop_1 | |
223 | |
224 cmp r12, #0 | |
225 beq cp_width_uv_done_1 | |
226 | |
227 cp_width_uv_8_loop_1 | |
228 vld1.8 {d0}, [r2]! | |
229 sub r12, r12, #8 | |
230 cmp r12, #8 | |
231 vst1.8 {d0}, [r3]! | |
232 bhs cp_width_uv_8_loop_1 | |
233 | |
234 cmp r12, #0 | |
235 beq cp_width_uv_done_1 | |
236 | |
237 cp_width_uv_1_loop_1 | |
238 ldrb r8, [r2], #1 | |
239 subs r12, r12, #1 | |
240 strb r8, [r3], #1 | |
241 bne cp_width_uv_1_loop_1 | |
242 cp_width_uv_done_1 | |
243 | |
244 subs r9, r9, #1 | |
245 ldrne r2, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 | |
246 ldrne r3, [r1, #yv12_buffer_config_v_buffer] ;dstptr1 | |
247 ldrne r10, [r0, #yv12_buffer_config_uv_stride] | |
248 ldrne r11, [r1, #yv12_buffer_config_uv_stride] | |
249 | |
250 addne r10, r2, r10 ;second row src | |
251 addne r11, r3, r11 ;second row dst | |
252 | |
253 bne cp_uv_loop | |
254 | |
255 vpop {d8 - d15} | |
256 pop {r4 - r11, pc} | |
257 | |
258 ENDP | |
259 END | |
OLD | NEW |