Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: source/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm

Issue 478033002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 EXPORT |vp8_yv12_extend_frame_borders_neon|
13 ARM
14 REQUIRE8
15 PRESERVE8
16
17 INCLUDE vpx_scale_asm_offsets.asm
18
19 AREA ||.text||, CODE, READONLY, ALIGN=2
20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf);
21 ; we depend on VP8BORDERINPIXELS being 32
22
23 |vp8_yv12_extend_frame_borders_neon| PROC
24 push {r4 - r10, lr}
25 vpush {d8 - d15}
26
27 ; Border = 32
28 ldr r3, [r0, #yv12_buffer_config_y_width] ; plane_width
29 ldr r1, [r0, #yv12_buffer_config_y_buffer] ; src_ptr1
30 ldr r4, [r0, #yv12_buffer_config_y_height] ; plane_height
31 ldr lr, [r0, #yv12_buffer_config_y_stride] ; plane_stride
32
33 ; Border copy for Y plane
34 ; copy the left and right most columns out
35 add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width)
36 sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width - 1
37 sub r5, r1, #32 ; dest_ptr1 = src_ptr1 - Border
38
39 mov r12, r4, lsr #2 ; plane_height / 4
40
41 copy_left_right_y
42 vld1.8 {d0[], d1[]}, [r1], lr
43 vld1.8 {d4[], d5[]}, [r2], lr
44 vld1.8 {d8[], d9[]}, [r1], lr
45 vld1.8 {d12[], d13[]}, [r2], lr
46 vld1.8 {d16[], d17[]}, [r1], lr
47 vld1.8 {d20[], d21[]}, [r2], lr
48 vld1.8 {d24[], d25[]}, [r1], lr
49 vld1.8 {d28[], d29[]}, [r2], lr
50
51 vmov q1, q0
52 vmov q3, q2
53 vmov q5, q4
54 vmov q7, q6
55 vmov q9, q8
56 vmov q11, q10
57 vmov q13, q12
58 vmov q15, q14
59
60 subs r12, r12, #1
61
62 vst1.8 {q0, q1}, [r5], lr
63 vst1.8 {q2, q3}, [r6], lr
64 vst1.8 {q4, q5}, [r5], lr
65 vst1.8 {q6, q7}, [r6], lr
66 vst1.8 {q8, q9}, [r5], lr
67 vst1.8 {q10, q11}, [r6], lr
68 vst1.8 {q12, q13}, [r5], lr
69 vst1.8 {q14, q15}, [r6], lr
70
71 bne copy_left_right_y
72
73 ;Now copy the top and bottom source lines into each line of the respective borde rs
74 ldr r1, [r0, #yv12_buffer_config_y_buffer] ; y_buffer
75 mul r8, r4, lr ; plane_height * plane_stride
76
77 ; copy width is plane_stride
78 movs r12, lr, lsr #7 ; plane_stride / 128
79
80 sub r1, r1, #32 ; src_ptr1 = y_buffer - Border
81 add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_strid e (src_ptr1 + (plane_height * plane_stride))
82 sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_heigh t * plane_stride) - plane_stride
83 sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * p lane_stride)
84 ble extra_y_copy_needed ; plane stride < 128
85
86 copy_top_bottom_y
87 vld1.8 {q0, q1}, [r1]!
88 vld1.8 {q8, q9}, [r2]!
89 vld1.8 {q2, q3}, [r1]!
90 vld1.8 {q10, q11}, [r2]!
91 vld1.8 {q4, q5}, [r1]!
92 vld1.8 {q12, q13}, [r2]!
93 vld1.8 {q6, q7}, [r1]!
94 vld1.8 {q14, q15}, [r2]!
95
96 mov r7, #32 ; Border
97
98 top_bottom_32
99 subs r7, r7, #1
100
101 vst1.8 {q0, q1}, [r5]!
102 vst1.8 {q8, q9}, [r6]!
103 vst1.8 {q2, q3}, [r5]!
104 vst1.8 {q10, q11}, [r6]!
105 vst1.8 {q4, q5}, [r5]!
106 vst1.8 {q12, q13}, [r6]!
107 vst1.8 {q6, q7}, [r5]!
108 vst1.8 {q14, q15}, [r6]!
109
110 add r5, r5, lr ; dest_ptr1 += plane_stride
111 sub r5, r5, #128 ; dest_ptr1 -= 128
112 add r6, r6, lr ; dest_ptr2 += plane_stride
113 sub r6, r6, #128 ; dest_ptr2 -= 128
114
115 bne top_bottom_32
116
117 sub r5, r1, lr, asl #5 ; src_ptr1 - (Border* plane_stride)
118 add r6, r2, lr ; src_ptr2 + plane_stride
119
120 subs r12, r12, #1
121 bne copy_top_bottom_y
122
123 extra_y_copy_needed
124 mov r7, lr, lsr #4 ; check to see if extra copy is need ed
125 ands r7, r7, #0x7
126 bne extra_top_bottom_y
127 end_of_border_copy_y
128
129 ;Border copy for U, V planes
130 ; Border = 16
131 ldr r7, [r0, #yv12_buffer_config_u_buffer] ; src_ptr1
132 ldr lr, [r0, #yv12_buffer_config_uv_stride] ; plane_stride
133 ldr r3, [r0, #yv12_buffer_config_uv_width] ; plane_width
134 ldr r4, [r0, #yv12_buffer_config_uv_height] ; plane_height
135
136 mov r10, #2
137
138 ;copy the left and right most columns out
139 border_copy_uv
140 mov r1, r7 ; src_ptr1 needs to be saved for sec ond half of loop
141 sub r5, r1, #16 ; dest_ptr1 = src_ptr1 - Border
142 add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width)
143 sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width - 1
144
145 mov r12, r4, lsr #3 ; plane_height / 8
146
147 copy_left_right_uv
148 vld1.8 {d0[], d1[]}, [r1], lr
149 vld1.8 {d2[], d3[]}, [r2], lr
150 vld1.8 {d4[], d5[]}, [r1], lr
151 vld1.8 {d6[], d7[]}, [r2], lr
152 vld1.8 {d8[], d9[]}, [r1], lr
153 vld1.8 {d10[], d11[]}, [r2], lr
154 vld1.8 {d12[], d13[]}, [r1], lr
155 vld1.8 {d14[], d15[]}, [r2], lr
156 vld1.8 {d16[], d17[]}, [r1], lr
157 vld1.8 {d18[], d19[]}, [r2], lr
158 vld1.8 {d20[], d21[]}, [r1], lr
159 vld1.8 {d22[], d23[]}, [r2], lr
160 vld1.8 {d24[], d25[]}, [r1], lr
161 vld1.8 {d26[], d27[]}, [r2], lr
162 vld1.8 {d28[], d29[]}, [r1], lr
163 vld1.8 {d30[], d31[]}, [r2], lr
164
165 subs r12, r12, #1
166
167 vst1.8 {q0}, [r5], lr
168 vst1.8 {q1}, [r6], lr
169 vst1.8 {q2}, [r5], lr
170 vst1.8 {q3}, [r6], lr
171 vst1.8 {q4}, [r5], lr
172 vst1.8 {q5}, [r6], lr
173 vst1.8 {q6}, [r5], lr
174 vst1.8 {q7}, [r6], lr
175 vst1.8 {q8}, [r5], lr
176 vst1.8 {q9}, [r6], lr
177 vst1.8 {q10}, [r5], lr
178 vst1.8 {q11}, [r6], lr
179 vst1.8 {q12}, [r5], lr
180 vst1.8 {q13}, [r6], lr
181 vst1.8 {q14}, [r5], lr
182 vst1.8 {q15}, [r6], lr
183
184 bne copy_left_right_uv
185
186 ;Now copy the top and bottom source lines into each line of the respective borde rs
187 mov r1, r7
188 mul r8, r4, lr ; plane_height * plane_stride
189 movs r12, lr, lsr #6 ; plane_stride / 64
190
191 sub r1, r1, #16 ; src_ptr1 = u_buffer - Border
192 add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_strid e (src_ptr1 + (plane_height * plane_stride)
193 sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_heigh t * plane_stride) - plane_stride
194 sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * p lane_stride)
195 ble extra_uv_copy_needed ; plane_stride < 64
196
197 copy_top_bottom_uv
198 vld1.8 {q0, q1}, [r1]!
199 vld1.8 {q8, q9}, [r2]!
200 vld1.8 {q2, q3}, [r1]!
201 vld1.8 {q10, q11}, [r2]!
202
203 mov r7, #16 ; Border
204
205 top_bottom_16
206 subs r7, r7, #1
207
208 vst1.8 {q0, q1}, [r5]!
209 vst1.8 {q8, q9}, [r6]!
210 vst1.8 {q2, q3}, [r5]!
211 vst1.8 {q10, q11}, [r6]!
212
213 add r5, r5, lr ; dest_ptr1 += plane_stride
214 sub r5, r5, #64
215 add r6, r6, lr ; dest_ptr2 += plane_stride
216 sub r6, r6, #64
217
218 bne top_bottom_16
219
220 sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * p lane_stride)
221 add r6, r2, lr ; dest_ptr2 = src_ptr2 + plane_strid e
222
223 subs r12, r12, #1
224 bne copy_top_bottom_uv
225 extra_uv_copy_needed
226 mov r7, lr, lsr #3 ; check to see if extra copy is need ed
227 ands r7, r7, #0x7
228 bne extra_top_bottom_uv
229
230 end_of_border_copy_uv
231 subs r10, r10, #1
232 ldrne r7, [r0, #yv12_buffer_config_v_buffer] ; src_ptr1
233 bne border_copy_uv
234
235 vpop {d8 - d15}
236 pop {r4 - r10, pc}
237
238 ;;;;;;;;;;;;;;;;;;;;;;
239 extra_top_bottom_y
240 vld1.8 {q0}, [r1]!
241 vld1.8 {q2}, [r2]!
242
243 mov r9, #4 ; 32 >> 3
244
245 extra_top_bottom_32
246 subs r9, r9, #1
247
248 vst1.8 {q0}, [r5], lr
249 vst1.8 {q2}, [r6], lr
250 vst1.8 {q0}, [r5], lr
251 vst1.8 {q2}, [r6], lr
252 vst1.8 {q0}, [r5], lr
253 vst1.8 {q2}, [r6], lr
254 vst1.8 {q0}, [r5], lr
255 vst1.8 {q2}, [r6], lr
256 vst1.8 {q0}, [r5], lr
257 vst1.8 {q2}, [r6], lr
258 vst1.8 {q0}, [r5], lr
259 vst1.8 {q2}, [r6], lr
260 vst1.8 {q0}, [r5], lr
261 vst1.8 {q2}, [r6], lr
262 vst1.8 {q0}, [r5], lr
263 vst1.8 {q2}, [r6], lr
264 bne extra_top_bottom_32
265
266 sub r5, r1, lr, asl #5 ; src_ptr1 - (Border * plane_stride)
267 add r6, r2, lr ; src_ptr2 + plane_stride
268 subs r7, r7, #1
269 bne extra_top_bottom_y
270
271 b end_of_border_copy_y
272
273 extra_top_bottom_uv
274 vld1.8 {d0}, [r1]!
275 vld1.8 {d8}, [r2]!
276
277 mov r9, #2 ; 16 >> 3
278
279 extra_top_bottom_16
280 subs r9, r9, #1
281
282 vst1.8 {d0}, [r5], lr
283 vst1.8 {d8}, [r6], lr
284 vst1.8 {d0}, [r5], lr
285 vst1.8 {d8}, [r6], lr
286 vst1.8 {d0}, [r5], lr
287 vst1.8 {d8}, [r6], lr
288 vst1.8 {d0}, [r5], lr
289 vst1.8 {d8}, [r6], lr
290 vst1.8 {d0}, [r5], lr
291 vst1.8 {d8}, [r6], lr
292 vst1.8 {d0}, [r5], lr
293 vst1.8 {d8}, [r6], lr
294 vst1.8 {d0}, [r5], lr
295 vst1.8 {d8}, [r6], lr
296 vst1.8 {d0}, [r5], lr
297 vst1.8 {d8}, [r6], lr
298 bne extra_top_bottom_16
299
300 sub r5, r1, lr, asl #4 ; src_ptr1 - (Border * plane_stride)
301 add r6, r2, lr ; src_ptr2 + plane_stride
302 subs r7, r7, #1
303 bne extra_top_bottom_uv
304
305 b end_of_border_copy_uv
306
307 ENDP
308 END
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698