Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(139)

Side by Side Diff: source/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm

Issue 478033002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 EXPORT |vp8_yv12_copy_src_frame_func_neon|
13 ARM
14 REQUIRE8
15 PRESERVE8
16
17 INCLUDE vpx_scale_asm_offsets.asm
18
19 AREA ||.text||, CODE, READONLY, ALIGN=2
20 ;Note: This function is used to copy source data in src_buffer[i] at beginning
21 ;of the encoding. The buffer has a width and height of cpi->oxcf.Width and
22 ;cpi->oxcf.Height, which can be ANY numbers(NOT always multiples of 16 or 4).
23
24 ;void vp8_yv12_copy_src_frame_func_neon(const YV12_BUFFER_CONFIG *src_ybc,
25 ; YV12_BUFFER_CONFIG *dst_ybc);
26
27 |vp8_yv12_copy_src_frame_func_neon| PROC
28 push {r4 - r11, lr}
29 vpush {d8 - d15}
30
31 ;Copy Y plane
32 ldr r4, [r0, #yv12_buffer_config_y_height]
33 ldr r5, [r0, #yv12_buffer_config_y_width]
34 ldr r6, [r0, #yv12_buffer_config_y_stride]
35 ldr r7, [r1, #yv12_buffer_config_y_stride]
36 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
37 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1
38
39 add r10, r2, r6 ;second row src
40 add r11, r3, r7 ;second row dst
41 mov r6, r6, lsl #1
42 mov r7, r7, lsl #1
43 sub r6, r6, r5 ;adjust stride
44 sub r7, r7, r5
45
46 ; copy two rows at one time
47 mov lr, r4, lsr #1
48
49 cp_src_to_dst_height_loop
50 mov r12, r5
51
52 cp_width_128_loop
53 vld1.8 {q0, q1}, [r2]!
54 vld1.8 {q4, q5}, [r10]!
55 vld1.8 {q2, q3}, [r2]!
56 vld1.8 {q6, q7}, [r10]!
57 vld1.8 {q8, q9}, [r2]!
58 vld1.8 {q12, q13}, [r10]!
59 vld1.8 {q10, q11}, [r2]!
60 vld1.8 {q14, q15}, [r10]!
61 sub r12, r12, #128
62 cmp r12, #128
63 vst1.8 {q0, q1}, [r3]!
64 vst1.8 {q4, q5}, [r11]!
65 vst1.8 {q2, q3}, [r3]!
66 vst1.8 {q6, q7}, [r11]!
67 vst1.8 {q8, q9}, [r3]!
68 vst1.8 {q12, q13}, [r11]!
69 vst1.8 {q10, q11}, [r3]!
70 vst1.8 {q14, q15}, [r11]!
71 bhs cp_width_128_loop
72
73 cmp r12, #0
74 beq cp_width_done
75
76 cp_width_8_loop
77 vld1.8 {d0}, [r2]!
78 vld1.8 {d1}, [r10]!
79 sub r12, r12, #8
80 cmp r12, #8
81 vst1.8 {d0}, [r3]!
82 vst1.8 {d1}, [r11]!
83 bhs cp_width_8_loop
84
85 cmp r12, #0
86 beq cp_width_done
87
88 cp_width_1_loop
89 ldrb r8, [r2], #1
90 subs r12, r12, #1
91 strb r8, [r3], #1
92 ldrb r8, [r10], #1
93 strb r8, [r11], #1
94 bne cp_width_1_loop
95
96 cp_width_done
97 subs lr, lr, #1
98 add r2, r2, r6
99 add r3, r3, r7
100 add r10, r10, r6
101 add r11, r11, r7
102 bne cp_src_to_dst_height_loop
103
104 ;copy last line for Y if y_height is odd
105 tst r4, #1
106 beq cp_width_done_1
107 mov r12, r5
108
109 cp_width_128_loop_1
110 vld1.8 {q0, q1}, [r2]!
111 vld1.8 {q2, q3}, [r2]!
112 vld1.8 {q8, q9}, [r2]!
113 vld1.8 {q10, q11}, [r2]!
114 sub r12, r12, #128
115 cmp r12, #128
116 vst1.8 {q0, q1}, [r3]!
117 vst1.8 {q2, q3}, [r3]!
118 vst1.8 {q8, q9}, [r3]!
119 vst1.8 {q10, q11}, [r3]!
120 bhs cp_width_128_loop_1
121
122 cmp r12, #0
123 beq cp_width_done_1
124
125 cp_width_8_loop_1
126 vld1.8 {d0}, [r2]!
127 sub r12, r12, #8
128 cmp r12, #8
129 vst1.8 {d0}, [r3]!
130 bhs cp_width_8_loop_1
131
132 cmp r12, #0
133 beq cp_width_done_1
134
135 cp_width_1_loop_1
136 ldrb r8, [r2], #1
137 subs r12, r12, #1
138 strb r8, [r3], #1
139 bne cp_width_1_loop_1
140 cp_width_done_1
141
142 ;Copy U & V planes
143 ldr r4, [r0, #yv12_buffer_config_uv_height]
144 ldr r5, [r0, #yv12_buffer_config_uv_width]
145 ldr r6, [r0, #yv12_buffer_config_uv_stride]
146 ldr r7, [r1, #yv12_buffer_config_uv_stride]
147 ldr r2, [r0, #yv12_buffer_config_u_buffer] ;srcptr1
148 ldr r3, [r1, #yv12_buffer_config_u_buffer] ;dstptr1
149
150 add r10, r2, r6 ;second row src
151 add r11, r3, r7 ;second row dst
152 mov r6, r6, lsl #1
153 mov r7, r7, lsl #1
154 sub r6, r6, r5 ;adjust stride
155 sub r7, r7, r5
156
157 mov r9, #2
158
159 cp_uv_loop
160 ;copy two rows at one time
161 mov lr, r4, lsr #1
162
163 cp_src_to_dst_height_uv_loop
164 mov r12, r5
165
166 cp_width_uv_64_loop
167 vld1.8 {q0, q1}, [r2]!
168 vld1.8 {q4, q5}, [r10]!
169 vld1.8 {q2, q3}, [r2]!
170 vld1.8 {q6, q7}, [r10]!
171 sub r12, r12, #64
172 cmp r12, #64
173 vst1.8 {q0, q1}, [r3]!
174 vst1.8 {q4, q5}, [r11]!
175 vst1.8 {q2, q3}, [r3]!
176 vst1.8 {q6, q7}, [r11]!
177 bhs cp_width_uv_64_loop
178
179 cmp r12, #0
180 beq cp_width_uv_done
181
182 cp_width_uv_8_loop
183 vld1.8 {d0}, [r2]!
184 vld1.8 {d1}, [r10]!
185 sub r12, r12, #8
186 cmp r12, #8
187 vst1.8 {d0}, [r3]!
188 vst1.8 {d1}, [r11]!
189 bhs cp_width_uv_8_loop
190
191 cmp r12, #0
192 beq cp_width_uv_done
193
194 cp_width_uv_1_loop
195 ldrb r8, [r2], #1
196 subs r12, r12, #1
197 strb r8, [r3], #1
198 ldrb r8, [r10], #1
199 strb r8, [r11], #1
200 bne cp_width_uv_1_loop
201
202 cp_width_uv_done
203 subs lr, lr, #1
204 add r2, r2, r6
205 add r3, r3, r7
206 add r10, r10, r6
207 add r11, r11, r7
208 bne cp_src_to_dst_height_uv_loop
209
210 ;copy last line for U & V if uv_height is odd
211 tst r4, #1
212 beq cp_width_uv_done_1
213 mov r12, r5
214
215 cp_width_uv_64_loop_1
216 vld1.8 {q0, q1}, [r2]!
217 vld1.8 {q2, q3}, [r2]!
218 sub r12, r12, #64
219 cmp r12, #64
220 vst1.8 {q0, q1}, [r3]!
221 vst1.8 {q2, q3}, [r3]!
222 bhs cp_width_uv_64_loop_1
223
224 cmp r12, #0
225 beq cp_width_uv_done_1
226
227 cp_width_uv_8_loop_1
228 vld1.8 {d0}, [r2]!
229 sub r12, r12, #8
230 cmp r12, #8
231 vst1.8 {d0}, [r3]!
232 bhs cp_width_uv_8_loop_1
233
234 cmp r12, #0
235 beq cp_width_uv_done_1
236
237 cp_width_uv_1_loop_1
238 ldrb r8, [r2], #1
239 subs r12, r12, #1
240 strb r8, [r3], #1
241 bne cp_width_uv_1_loop_1
242 cp_width_uv_done_1
243
244 subs r9, r9, #1
245 ldrne r2, [r0, #yv12_buffer_config_v_buffer] ;srcptr1
246 ldrne r3, [r1, #yv12_buffer_config_v_buffer] ;dstptr1
247 ldrne r10, [r0, #yv12_buffer_config_uv_stride]
248 ldrne r11, [r1, #yv12_buffer_config_uv_stride]
249
250 addne r10, r2, r10 ;second row src
251 addne r11, r3, r11 ;second row dst
252
253 bne cp_uv_loop
254
255 vpop {d8 - d15}
256 pop {r4 - r11, pc}
257
258 ENDP
259 END
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698