Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Side by Side Diff: source/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: '' Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 11
12 EXPORT |vp8_yv12_extend_frame_borders_neon| 12 EXPORT |vp8_yv12_extend_frame_borders_neon|
13 ARM 13 ARM
14 REQUIRE8 14 REQUIRE8
15 PRESERVE8 15 PRESERVE8
16 16
17 INCLUDE asm_com_offsets.asm 17 INCLUDE asm_com_offsets.asm
18 18
19 AREA ||.text||, CODE, READONLY, ALIGN=2 19 AREA ||.text||, CODE, READONLY, ALIGN=2
20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf); 20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf);
21 ;Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_ height 21 ; we depend on VP8BORDERINPIXELS being 32
22 ; are always multiples of 16.
23 22
24 |vp8_yv12_extend_frame_borders_neon| PROC 23 |vp8_yv12_extend_frame_borders_neon| PROC
25 push {r4 - r10, lr} 24 push {r4 - r10, lr}
26 vpush {d8 - d15} 25 vpush {d8 - d15}
27 26
28 ;Not need to load y_width, since: y_width = y_stride - 2*border 27 ; Border = 32
29 ldr r3, [r0, #yv12_buffer_config_border] 28 ldr r3, [r0, #yv12_buffer_config_y_width] ; plane_width
30 ldr r1, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 29 ldr r1, [r0, #yv12_buffer_config_y_buffer] ; src_ptr1
31 ldr r4, [r0, #yv12_buffer_config_y_height] 30 ldr r4, [r0, #yv12_buffer_config_y_height] ; plane_height
32 ldr lr, [r0, #yv12_buffer_config_y_stride] 31 ldr lr, [r0, #yv12_buffer_config_y_stride] ; plane_stride
33 32
34 cmp r3, #16 33 ; Border copy for Y plane
35 beq b16_extend_frame_borders 34 ; copy the left and right most columns out
35 add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width)
36 sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width - 1
37 sub r5, r1, #32 ; dest_ptr1 = src_ptr1 - Border
36 38
37 ;======================= 39 mov r12, r4, lsr #2 ; plane_height / 4
38 b32_extend_frame_borders
39 ;border = 32
40 ;=======================
41 ;Border copy for Y plane
42 ;copy the left and right most columns out
43 sub r5, r1, r3 ;destptr1
44 add r6, r1, lr
45 sub r6, r6, r3, lsl #1 ;destptr2
46 sub r2, r6, #1 ;srcptr2
47
48 ;Do four rows at one time
49 mov r12, r4, lsr #2
50 40
51 copy_left_right_y 41 copy_left_right_y
52 vld1.8 {d0[], d1[]}, [r1], lr 42 vld1.8 {d0[], d1[]}, [r1], lr
53 vld1.8 {d4[], d5[]}, [r2], lr 43 vld1.8 {d4[], d5[]}, [r2], lr
54 vld1.8 {d8[], d9[]}, [r1], lr 44 vld1.8 {d8[], d9[]}, [r1], lr
55 vld1.8 {d12[], d13[]}, [r2], lr 45 vld1.8 {d12[], d13[]}, [r2], lr
56 vld1.8 {d16[], d17[]}, [r1], lr 46 vld1.8 {d16[], d17[]}, [r1], lr
57 vld1.8 {d20[], d21[]}, [r2], lr 47 vld1.8 {d20[], d21[]}, [r2], lr
58 vld1.8 {d24[], d25[]}, [r1], lr 48 vld1.8 {d24[], d25[]}, [r1], lr
59 vld1.8 {d28[], d29[]}, [r2], lr 49 vld1.8 {d28[], d29[]}, [r2], lr
60 50
61 vmov q1, q0 51 vmov q1, q0
62 vmov q3, q2 52 vmov q3, q2
63 vmov q5, q4 53 vmov q5, q4
64 vmov q7, q6 54 vmov q7, q6
65 vmov q9, q8 55 vmov q9, q8
66 vmov q11, q10 56 vmov q11, q10
67 vmov q13, q12 57 vmov q13, q12
68 vmov q15, q14 58 vmov q15, q14
69 59
70 subs r12, r12, #1 60 subs r12, r12, #1
71 61
72 vst1.8 {q0, q1}, [r5], lr 62 vst1.8 {q0, q1}, [r5], lr
73 vst1.8 {q2, q3}, [r6], lr 63 vst1.8 {q2, q3}, [r6], lr
74 vst1.8 {q4, q5}, [r5], lr 64 vst1.8 {q4, q5}, [r5], lr
75 vst1.8 {q6, q7}, [r6], lr 65 vst1.8 {q6, q7}, [r6], lr
76 vst1.8 {q8, q9}, [r5], lr 66 vst1.8 {q8, q9}, [r5], lr
77 vst1.8 {q10, q11}, [r6], lr 67 vst1.8 {q10, q11}, [r6], lr
78 vst1.8 {q12, q13}, [r5], lr 68 vst1.8 {q12, q13}, [r5], lr
79 vst1.8 {q14, q15}, [r6], lr 69 vst1.8 {q14, q15}, [r6], lr
80 70
81 bne copy_left_right_y 71 bne copy_left_right_y
82 72
83 ;Now copy the top and bottom source lines into each line of the respective borde rs 73 ;Now copy the top and bottom source lines into each line of the respective borde rs
84 ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 74 ldr r1, [r0, #yv12_buffer_config_y_buffer] ; y_buffer
85 mul r8, r3, lr 75 mul r8, r4, lr ; plane_height * plane_stride
86 76
87 mov r12, lr, lsr #7 77 ; copy width is plane_stride
78 mov r12, lr, lsr #7 ; plane_stride / 128
88 79
89 sub r6, r1, r3 ;destptr2 80 sub r1, r1, #32 ; src_ptr1 = y_buffer - Border
90 sub r2, r6, lr ;srcptr2 81 add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_strid e (src_ptr1 + (plane_height * plane_stride))
91 sub r1, r7, r3 ;srcptr1 82 sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_heigh t * plane_stride) - plane_stride
92 sub r5, r1, r8 ;destptr1 83 sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * p lane_stride)
93 84
94 copy_top_bottom_y 85 copy_top_bottom_y
95 vld1.8 {q0, q1}, [r1]! 86 vld1.8 {q0, q1}, [r1]!
96 vld1.8 {q8, q9}, [r2]! 87 vld1.8 {q8, q9}, [r2]!
97 vld1.8 {q2, q3}, [r1]! 88 vld1.8 {q2, q3}, [r1]!
98 vld1.8 {q10, q11}, [r2]! 89 vld1.8 {q10, q11}, [r2]!
99 vld1.8 {q4, q5}, [r1]! 90 vld1.8 {q4, q5}, [r1]!
100 vld1.8 {q12, q13}, [r2]! 91 vld1.8 {q12, q13}, [r2]!
101 vld1.8 {q6, q7}, [r1]! 92 vld1.8 {q6, q7}, [r1]!
102 vld1.8 {q14, q15}, [r2]! 93 vld1.8 {q14, q15}, [r2]!
103 94
104 mov r7, r3 95 mov r7, #32 ; Border
105 96
106 top_bottom_32 97 top_bottom_32
107 subs r7, r7, #1 98 subs r7, r7, #1
108 99
109 vst1.8 {q0, q1}, [r5]! 100 vst1.8 {q0, q1}, [r5]!
110 vst1.8 {q8, q9}, [r6]! 101 vst1.8 {q8, q9}, [r6]!
111 vst1.8 {q2, q3}, [r5]! 102 vst1.8 {q2, q3}, [r5]!
112 vst1.8 {q10, q11}, [r6]! 103 vst1.8 {q10, q11}, [r6]!
113 vst1.8 {q4, q5}, [r5]! 104 vst1.8 {q4, q5}, [r5]!
114 vst1.8 {q12, q13}, [r6]! 105 vst1.8 {q12, q13}, [r6]!
115 vst1.8 {q6, q7}, [r5]! 106 vst1.8 {q6, q7}, [r5]!
116 vst1.8 {q14, q15}, [r6]! 107 vst1.8 {q14, q15}, [r6]!
117 108
118 add r5, r5, lr 109 add r5, r5, lr ; dest_ptr1 += plane_stride
119 sub r5, r5, #128 110 sub r5, r5, #128 ; dest_ptr1 -= 128
120 add r6, r6, lr 111 add r6, r6, lr ; dest_ptr2 += plane_stride
121 sub r6, r6, #128 112 sub r6, r6, #128 ; dest_ptr2 -= 128
122 113
123 bne top_bottom_32 114 bne top_bottom_32
124 115
125 sub r5, r1, r8 116 sub r5, r1, lr, asl #5 ; src_ptr1 - (Border* plane_stride)
126 add r6, r2, lr 117 add r6, r2, lr ; src_ptr2 + plane_stride
127 118
128 subs r12, r12, #1 119 subs r12, r12, #1
129 bne copy_top_bottom_y 120 bne copy_top_bottom_y
130 121
131 mov r7, lr, lsr #4 ;check to see if extra copy is n eeded 122 mov r7, lr, lsr #4 ; check to see if extra copy is need ed
132 ands r7, r7, #0x7 123 ands r7, r7, #0x7
133 bne extra_top_bottom_y 124 bne extra_top_bottom_y
134 end_of_border_copy_y 125 end_of_border_copy_y
135 126
136 ;Border copy for U, V planes 127 ;Border copy for U, V planes
137 ldr r1, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 128 ; Border = 16
138 mov lr, lr, lsr #1 ;uv_stride 129 ldr r7, [r0, #yv12_buffer_config_u_buffer] ; src_ptr1
139 mov r3, r3, lsr #1 ;border 130 ldr lr, [r0, #yv12_buffer_config_uv_stride] ; plane_stride
140 mov r4, r4, lsr #1 ;uv_height 131 ldr r3, [r0, #yv12_buffer_config_uv_width] ; plane_width
141 mov r8, r8, lsr #2 132 ldr r4, [r0, #yv12_buffer_config_uv_height] ; plane_height
142 133
143 mov r10, #2 134 mov r10, #2
144 135
145 ;copy the left and right most columns out 136 ;copy the left and right most columns out
146 border_copy_uv 137 border_copy_uv
147 sub r5, r1, r3 ;destptr1 138 mov r1, r7 ; src_ptr1 needs to be saved for sec ond half of loop
148 add r6, r1, lr 139 sub r5, r1, #16 ; dest_ptr1 = src_ptr1 - Border
149 sub r6, r6, r3, lsl #1 ;destptr2 140 add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width)
150 sub r2, r6, #1 ;srcptr2 141 sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width - 1
151 142
152 mov r7, r1 143 mov r12, r4, lsr #3 ; plane_height / 8
153
154 ;Do eight rows at one time
155 mov r12, r4, lsr #3
156 144
157 copy_left_right_uv 145 copy_left_right_uv
158 vld1.8 {d0[], d1[]}, [r1], lr 146 vld1.8 {d0[], d1[]}, [r1], lr
159 vld1.8 {d2[], d3[]}, [r2], lr 147 vld1.8 {d2[], d3[]}, [r2], lr
160 vld1.8 {d4[], d5[]}, [r1], lr 148 vld1.8 {d4[], d5[]}, [r1], lr
161 vld1.8 {d6[], d7[]}, [r2], lr 149 vld1.8 {d6[], d7[]}, [r2], lr
162 vld1.8 {d8[], d9[]}, [r1], lr 150 vld1.8 {d8[], d9[]}, [r1], lr
163 vld1.8 {d10[], d11[]}, [r2], lr 151 vld1.8 {d10[], d11[]}, [r2], lr
164 vld1.8 {d12[], d13[]}, [r1], lr 152 vld1.8 {d12[], d13[]}, [r1], lr
165 vld1.8 {d14[], d15[]}, [r2], lr 153 vld1.8 {d14[], d15[]}, [r2], lr
166 vld1.8 {d16[], d17[]}, [r1], lr 154 vld1.8 {d16[], d17[]}, [r1], lr
167 vld1.8 {d18[], d19[]}, [r2], lr 155 vld1.8 {d18[], d19[]}, [r2], lr
168 vld1.8 {d20[], d21[]}, [r1], lr 156 vld1.8 {d20[], d21[]}, [r1], lr
169 vld1.8 {d22[], d23[]}, [r2], lr 157 vld1.8 {d22[], d23[]}, [r2], lr
170 vld1.8 {d24[], d25[]}, [r1], lr 158 vld1.8 {d24[], d25[]}, [r1], lr
171 vld1.8 {d26[], d27[]}, [r2], lr 159 vld1.8 {d26[], d27[]}, [r2], lr
172 vld1.8 {d28[], d29[]}, [r1], lr 160 vld1.8 {d28[], d29[]}, [r1], lr
173 vld1.8 {d30[], d31[]}, [r2], lr 161 vld1.8 {d30[], d31[]}, [r2], lr
174 162
175 subs r12, r12, #1 163 subs r12, r12, #1
176 164
177 vst1.8 {q0}, [r5], lr 165 vst1.8 {q0}, [r5], lr
178 vst1.8 {q1}, [r6], lr 166 vst1.8 {q1}, [r6], lr
179 vst1.8 {q2}, [r5], lr 167 vst1.8 {q2}, [r5], lr
180 vst1.8 {q3}, [r6], lr 168 vst1.8 {q3}, [r6], lr
181 vst1.8 {q4}, [r5], lr 169 vst1.8 {q4}, [r5], lr
182 vst1.8 {q5}, [r6], lr 170 vst1.8 {q5}, [r6], lr
183 vst1.8 {q6}, [r5], lr 171 vst1.8 {q6}, [r5], lr
184 vst1.8 {q7}, [r6], lr 172 vst1.8 {q7}, [r6], lr
185 vst1.8 {q8}, [r5], lr 173 vst1.8 {q8}, [r5], lr
186 vst1.8 {q9}, [r6], lr 174 vst1.8 {q9}, [r6], lr
187 vst1.8 {q10}, [r5], lr 175 vst1.8 {q10}, [r5], lr
188 vst1.8 {q11}, [r6], lr 176 vst1.8 {q11}, [r6], lr
189 vst1.8 {q12}, [r5], lr 177 vst1.8 {q12}, [r5], lr
190 vst1.8 {q13}, [r6], lr 178 vst1.8 {q13}, [r6], lr
191 vst1.8 {q14}, [r5], lr 179 vst1.8 {q14}, [r5], lr
192 vst1.8 {q15}, [r6], lr 180 vst1.8 {q15}, [r6], lr
193 181
194 bne copy_left_right_uv 182 bne copy_left_right_uv
195 183
196 ;Now copy the top and bottom source lines into each line of the respective borde rs 184 ;Now copy the top and bottom source lines into each line of the respective borde rs
197 mov r12, lr, lsr #6 185 mov r1, r7
186 mul r8, r4, lr ; plane_height * plane_stride
187 mov r12, lr, lsr #6 ; plane_stride / 64
198 188
199 sub r6, r1, r3 ;destptr2 189 sub r1, r1, #16 ; src_ptr1 = u_buffer - Border
200 sub r2, r6, lr ;srcptr2 190 add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_strid e (src_ptr1 + (plane_height * plane_stride)
201 sub r1, r7, r3 ;srcptr1 191 sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_heigh t * plane_stride) - plane_stride
202 sub r5, r1, r8 ;destptr1 192 sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * p lane_stride)
203 193
204 copy_top_bottom_uv 194 copy_top_bottom_uv
205 vld1.8 {q0, q1}, [r1]! 195 vld1.8 {q0, q1}, [r1]!
206 vld1.8 {q8, q9}, [r2]! 196 vld1.8 {q8, q9}, [r2]!
207 vld1.8 {q2, q3}, [r1]! 197 vld1.8 {q2, q3}, [r1]!
208 vld1.8 {q10, q11}, [r2]! 198 vld1.8 {q10, q11}, [r2]!
209 199
210 mov r7, r3 200 mov r7, #16 ; Border
211 201
212 top_bottom_16 202 top_bottom_16
213 subs r7, r7, #1 203 subs r7, r7, #1
214 204
215 vst1.8 {q0, q1}, [r5]! 205 vst1.8 {q0, q1}, [r5]!
216 vst1.8 {q8, q9}, [r6]! 206 vst1.8 {q8, q9}, [r6]!
217 vst1.8 {q2, q3}, [r5]! 207 vst1.8 {q2, q3}, [r5]!
218 vst1.8 {q10, q11}, [r6]! 208 vst1.8 {q10, q11}, [r6]!
219 209
220 add r5, r5, lr 210 add r5, r5, lr ; dest_ptr1 += plane_stride
221 sub r5, r5, #64 211 sub r5, r5, #64
222 add r6, r6, lr 212 add r6, r6, lr ; dest_ptr2 += plane_stride
223 sub r6, r6, #64 213 sub r6, r6, #64
224 214
225 bne top_bottom_16 215 bne top_bottom_16
226 216
227 sub r5, r1, r8 217 sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * p lane_stride)
228 add r6, r2, lr 218 add r6, r2, lr ; dest_ptr2 = src_ptr2 + plane_strid e
229 219
230 subs r12, r12, #1 220 subs r12, r12, #1
231 bne copy_top_bottom_uv 221 bne copy_top_bottom_uv
232 222
233 mov r7, lr, lsr #3 ;check to see if extra copy is n eeded 223 mov r7, lr, lsr #3 ; check to see if extra copy is need ed
234 ands r7, r7, #0x7 224 ands r7, r7, #0x7
235 bne extra_top_bottom_uv 225 bne extra_top_bottom_uv
236 226
237 end_of_border_copy_uv 227 end_of_border_copy_uv
238 subs r10, r10, #1 228 subs r10, r10, #1
239 ldrne r1, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 229 ldrne r7, [r0, #yv12_buffer_config_v_buffer] ; src_ptr1
240 bne border_copy_uv 230 bne border_copy_uv
241 231
242 vpop {d8 - d15} 232 vpop {d8 - d15}
243 pop {r4 - r10, pc} 233 pop {r4 - r10, pc}
244 234
245 ;;;;;;;;;;;;;;;;;;;;;; 235 ;;;;;;;;;;;;;;;;;;;;;;
246 ;extra copy part for Y
247 extra_top_bottom_y 236 extra_top_bottom_y
248 vld1.8 {q0}, [r1]! 237 vld1.8 {q0}, [r1]!
249 vld1.8 {q2}, [r2]! 238 vld1.8 {q2}, [r2]!
250 239
251 mov r9, r3, lsr #3 240 mov r9, #4 ; 32 >> 3
252 241
253 extra_top_bottom_32 242 extra_top_bottom_32
254 subs r9, r9, #1 243 subs r9, r9, #1
255 244
256 vst1.8 {q0}, [r5], lr 245 vst1.8 {q0}, [r5], lr
257 vst1.8 {q2}, [r6], lr 246 vst1.8 {q2}, [r6], lr
258 vst1.8 {q0}, [r5], lr 247 vst1.8 {q0}, [r5], lr
259 vst1.8 {q2}, [r6], lr 248 vst1.8 {q2}, [r6], lr
260 vst1.8 {q0}, [r5], lr 249 vst1.8 {q0}, [r5], lr
261 vst1.8 {q2}, [r6], lr 250 vst1.8 {q2}, [r6], lr
262 vst1.8 {q0}, [r5], lr 251 vst1.8 {q0}, [r5], lr
263 vst1.8 {q2}, [r6], lr 252 vst1.8 {q2}, [r6], lr
264 vst1.8 {q0}, [r5], lr 253 vst1.8 {q0}, [r5], lr
265 vst1.8 {q2}, [r6], lr 254 vst1.8 {q2}, [r6], lr
266 vst1.8 {q0}, [r5], lr 255 vst1.8 {q0}, [r5], lr
267 vst1.8 {q2}, [r6], lr 256 vst1.8 {q2}, [r6], lr
268 vst1.8 {q0}, [r5], lr 257 vst1.8 {q0}, [r5], lr
269 vst1.8 {q2}, [r6], lr 258 vst1.8 {q2}, [r6], lr
270 vst1.8 {q0}, [r5], lr 259 vst1.8 {q0}, [r5], lr
271 vst1.8 {q2}, [r6], lr 260 vst1.8 {q2}, [r6], lr
272 bne extra_top_bottom_32 261 bne extra_top_bottom_32
273 262
274 sub r5, r1, r8 263 sub r5, r1, lr, asl #5 ; src_ptr1 - (Border * plane_stride)
275 add r6, r2, lr 264 add r6, r2, lr ; src_ptr2 + plane_stride
276 subs r7, r7, #1 265 subs r7, r7, #1
277 bne extra_top_bottom_y 266 bne extra_top_bottom_y
278 267
279 b end_of_border_copy_y 268 b end_of_border_copy_y
280 269
281 ;extra copy part for UV
282 extra_top_bottom_uv 270 extra_top_bottom_uv
283 vld1.8 {d0}, [r1]! 271 vld1.8 {d0}, [r1]!
284 vld1.8 {d8}, [r2]! 272 vld1.8 {d8}, [r2]!
285 273
286 mov r9, r3, lsr #3 274 mov r9, #2 ; 16 >> 3
287 275
288 extra_top_bottom_16 276 extra_top_bottom_16
289 subs r9, r9, #1 277 subs r9, r9, #1
290 278
291 vst1.8 {d0}, [r5], lr 279 vst1.8 {d0}, [r5], lr
292 vst1.8 {d8}, [r6], lr 280 vst1.8 {d8}, [r6], lr
293 vst1.8 {d0}, [r5], lr 281 vst1.8 {d0}, [r5], lr
294 vst1.8 {d8}, [r6], lr 282 vst1.8 {d8}, [r6], lr
295 vst1.8 {d0}, [r5], lr 283 vst1.8 {d0}, [r5], lr
296 vst1.8 {d8}, [r6], lr 284 vst1.8 {d8}, [r6], lr
297 vst1.8 {d0}, [r5], lr 285 vst1.8 {d0}, [r5], lr
298 vst1.8 {d8}, [r6], lr 286 vst1.8 {d8}, [r6], lr
299 vst1.8 {d0}, [r5], lr 287 vst1.8 {d0}, [r5], lr
300 vst1.8 {d8}, [r6], lr 288 vst1.8 {d8}, [r6], lr
301 vst1.8 {d0}, [r5], lr 289 vst1.8 {d0}, [r5], lr
302 vst1.8 {d8}, [r6], lr 290 vst1.8 {d8}, [r6], lr
303 vst1.8 {d0}, [r5], lr 291 vst1.8 {d0}, [r5], lr
304 vst1.8 {d8}, [r6], lr 292 vst1.8 {d8}, [r6], lr
305 vst1.8 {d0}, [r5], lr 293 vst1.8 {d0}, [r5], lr
306 vst1.8 {d8}, [r6], lr 294 vst1.8 {d8}, [r6], lr
307 bne extra_top_bottom_16 295 bne extra_top_bottom_16
308 296
309 sub r5, r1, r8 297 sub r5, r1, lr, asl #4 ; src_ptr1 - (Border * plane_stride)
310 add r6, r2, lr 298 add r6, r2, lr ; src_ptr2 + plane_stride
311 subs r7, r7, #1 299 subs r7, r7, #1
312 bne extra_top_bottom_uv 300 bne extra_top_bottom_uv
313 301
314 b end_of_border_copy_uv 302 b end_of_border_copy_uv
315 303
316
317 ;=======================
318 b16_extend_frame_borders
319 ;border = 16
320 ;=======================
321 ;Border copy for Y plane
322 ;copy the left and right most columns out
323 sub r5, r1, r3 ;destptr1
324 add r6, r1, lr
325 sub r6, r6, r3, lsl #1 ;destptr2
326 sub r2, r6, #1 ;srcptr2
327
328 ;Do four rows at one time
329 mov r12, r4, lsr #2
330
331 copy_left_right_y_b16
332 vld1.8 {d0[], d1[]}, [r1], lr
333 vld1.8 {d4[], d5[]}, [r2], lr
334 vld1.8 {d8[], d9[]}, [r1], lr
335 vld1.8 {d12[], d13[]}, [r2], lr
336 vld1.8 {d16[], d17[]}, [r1], lr
337 vld1.8 {d20[], d21[]}, [r2], lr
338 vld1.8 {d24[], d25[]}, [r1], lr
339 vld1.8 {d28[], d29[]}, [r2], lr
340
341 subs r12, r12, #1
342
343 vst1.8 {q0}, [r5], lr
344 vst1.8 {q2}, [r6], lr
345 vst1.8 {q4}, [r5], lr
346 vst1.8 {q6}, [r6], lr
347 vst1.8 {q8}, [r5], lr
348 vst1.8 {q10}, [r6], lr
349 vst1.8 {q12}, [r5], lr
350 vst1.8 {q14}, [r6], lr
351
352 bne copy_left_right_y_b16
353
354 ;Now copy the top and bottom source lines into each line of the respective borde rs
355 ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
356 mul r8, r3, lr
357
358 mov r12, lr, lsr #7
359
360 sub r6, r1, r3 ;destptr2
361 sub r2, r6, lr ;srcptr2
362 sub r1, r7, r3 ;srcptr1
363 sub r5, r1, r8 ;destptr1
364
365 copy_top_bottom_y_b16
366 vld1.8 {q0, q1}, [r1]!
367 vld1.8 {q8, q9}, [r2]!
368 vld1.8 {q2, q3}, [r1]!
369 vld1.8 {q10, q11}, [r2]!
370 vld1.8 {q4, q5}, [r1]!
371 vld1.8 {q12, q13}, [r2]!
372 vld1.8 {q6, q7}, [r1]!
373 vld1.8 {q14, q15}, [r2]!
374
375 mov r7, r3
376
377 top_bottom_16_b16
378 subs r7, r7, #1
379
380 vst1.8 {q0, q1}, [r5]!
381 vst1.8 {q8, q9}, [r6]!
382 vst1.8 {q2, q3}, [r5]!
383 vst1.8 {q10, q11}, [r6]!
384 vst1.8 {q4, q5}, [r5]!
385 vst1.8 {q12, q13}, [r6]!
386 vst1.8 {q6, q7}, [r5]!
387 vst1.8 {q14, q15}, [r6]!
388
389 add r5, r5, lr
390 sub r5, r5, #128
391 add r6, r6, lr
392 sub r6, r6, #128
393
394 bne top_bottom_16_b16
395
396 sub r5, r1, r8
397 add r6, r2, lr
398
399 subs r12, r12, #1
400 bne copy_top_bottom_y_b16
401
402 mov r7, lr, lsr #4 ;check to see if extra copy is n eeded
403 ands r7, r7, #0x7
404 bne extra_top_bottom_y_b16
405 end_of_border_copy_y_b16
406
407 ;Border copy for U, V planes
408 ldr r1, [r0, #yv12_buffer_config_u_buffer] ;srcptr1
409 mov lr, lr, lsr #1 ;uv_stride
410 mov r3, r3, lsr #1 ;border
411 mov r4, r4, lsr #1 ;uv_height
412 mov r8, r8, lsr #2
413
414 mov r10, #2
415
416 ;copy the left and right most columns out
417 border_copy_uv_b16
418 sub r5, r1, r3 ;destptr1
419 add r6, r1, lr
420 sub r6, r6, r3, lsl #1 ;destptr2
421 sub r2, r6, #1 ;srcptr2
422
423 mov r7, r1
424
425 ;Do eight rows at one time
426 mov r12, r4, lsr #3
427
428 copy_left_right_uv_b16
429 vld1.8 {d0[]}, [r1], lr
430 vld1.8 {d2[]}, [r2], lr
431 vld1.8 {d4[]}, [r1], lr
432 vld1.8 {d6[]}, [r2], lr
433 vld1.8 {d8[]}, [r1], lr
434 vld1.8 {d10[]}, [r2], lr
435 vld1.8 {d12[]}, [r1], lr
436 vld1.8 {d14[]}, [r2], lr
437 vld1.8 {d16[]}, [r1], lr
438 vld1.8 {d18[]}, [r2], lr
439 vld1.8 {d20[]}, [r1], lr
440 vld1.8 {d22[]}, [r2], lr
441 vld1.8 {d24[]}, [r1], lr
442 vld1.8 {d26[]}, [r2], lr
443 vld1.8 {d28[]}, [r1], lr
444 vld1.8 {d30[]}, [r2], lr
445
446 subs r12, r12, #1
447
448 vst1.8 {d0}, [r5], lr
449 vst1.8 {d2}, [r6], lr
450 vst1.8 {d4}, [r5], lr
451 vst1.8 {d6}, [r6], lr
452 vst1.8 {d8}, [r5], lr
453 vst1.8 {d10}, [r6], lr
454 vst1.8 {d12}, [r5], lr
455 vst1.8 {d14}, [r6], lr
456 vst1.8 {d16}, [r5], lr
457 vst1.8 {d18}, [r6], lr
458 vst1.8 {d20}, [r5], lr
459 vst1.8 {d22}, [r6], lr
460 vst1.8 {d24}, [r5], lr
461 vst1.8 {d26}, [r6], lr
462 vst1.8 {d28}, [r5], lr
463 vst1.8 {d30}, [r6], lr
464
465 bne copy_left_right_uv_b16
466
467 ;Now copy the top and bottom source lines into each line of the respective borde rs
468 mov r12, lr, lsr #6
469
470 sub r6, r1, r3 ;destptr2
471 sub r2, r6, lr ;srcptr2
472 sub r1, r7, r3 ;srcptr1
473 sub r5, r1, r8 ;destptr1
474
475 copy_top_bottom_uv_b16
476 vld1.8 {q0, q1}, [r1]!
477 vld1.8 {q8, q9}, [r2]!
478 vld1.8 {q2, q3}, [r1]!
479 vld1.8 {q10, q11}, [r2]!
480
481 mov r7, r3
482
483 top_bottom_8_b16
484 subs r7, r7, #1
485
486 vst1.8 {q0, q1}, [r5]!
487 vst1.8 {q8, q9}, [r6]!
488 vst1.8 {q2, q3}, [r5]!
489 vst1.8 {q10, q11}, [r6]!
490
491 add r5, r5, lr
492 sub r5, r5, #64
493 add r6, r6, lr
494 sub r6, r6, #64
495
496 bne top_bottom_8_b16
497
498 sub r5, r1, r8
499 add r6, r2, lr
500
501 subs r12, r12, #1
502 bne copy_top_bottom_uv_b16
503
504 mov r7, lr, lsr #3 ;check to see if extra copy is n eeded
505 ands r7, r7, #0x7
506 bne extra_top_bottom_uv_b16
507
508 end_of_border_copy_uv_b16
509 subs r10, r10, #1
510 ldrne r1, [r0, #yv12_buffer_config_v_buffer] ;srcptr1
511 bne border_copy_uv_b16
512
513 vpop {d8-d15}
514 pop {r4 - r10, pc}
515
516 ;;;;;;;;;;;;;;;;;;;;;;
517 ;extra copy part for Y
518 extra_top_bottom_y_b16
519 vld1.8 {q0}, [r1]!
520 vld1.8 {q2}, [r2]!
521
522 mov r9, r3, lsr #3
523
524 extra_top_bottom_16_b16
525 subs r9, r9, #1
526
527 vst1.8 {q0}, [r5], lr
528 vst1.8 {q2}, [r6], lr
529 vst1.8 {q0}, [r5], lr
530 vst1.8 {q2}, [r6], lr
531 vst1.8 {q0}, [r5], lr
532 vst1.8 {q2}, [r6], lr
533 vst1.8 {q0}, [r5], lr
534 vst1.8 {q2}, [r6], lr
535 vst1.8 {q0}, [r5], lr
536 vst1.8 {q2}, [r6], lr
537 vst1.8 {q0}, [r5], lr
538 vst1.8 {q2}, [r6], lr
539 vst1.8 {q0}, [r5], lr
540 vst1.8 {q2}, [r6], lr
541 vst1.8 {q0}, [r5], lr
542 vst1.8 {q2}, [r6], lr
543 bne extra_top_bottom_16_b16
544
545 sub r5, r1, r8
546 add r6, r2, lr
547 subs r7, r7, #1
548 bne extra_top_bottom_y_b16
549
550 b end_of_border_copy_y_b16
551
552 ;extra copy part for UV
553 extra_top_bottom_uv_b16
554 vld1.8 {d0}, [r1]!
555 vld1.8 {d8}, [r2]!
556
557 mov r9, r3, lsr #3
558
559 extra_top_bottom_8_b16
560 subs r9, r9, #1
561
562 vst1.8 {d0}, [r5], lr
563 vst1.8 {d8}, [r6], lr
564 vst1.8 {d0}, [r5], lr
565 vst1.8 {d8}, [r6], lr
566 vst1.8 {d0}, [r5], lr
567 vst1.8 {d8}, [r6], lr
568 vst1.8 {d0}, [r5], lr
569 vst1.8 {d8}, [r6], lr
570 vst1.8 {d0}, [r5], lr
571 vst1.8 {d8}, [r6], lr
572 vst1.8 {d0}, [r5], lr
573 vst1.8 {d8}, [r6], lr
574 vst1.8 {d0}, [r5], lr
575 vst1.8 {d8}, [r6], lr
576 vst1.8 {d0}, [r5], lr
577 vst1.8 {d8}, [r6], lr
578 bne extra_top_bottom_8_b16
579
580 sub r5, r1, r8
581 add r6, r2, lr
582 subs r7, r7, #1
583 bne extra_top_bottom_uv_b16
584
585 b end_of_border_copy_uv_b16
586
587 ENDP 304 ENDP
588 END 305 END
OLDNEW
« no previous file with comments | « source/libvpx/vpx_scale/arm/nds/yv12extend.c ('k') | source/libvpx/vpx_scale/generic/yv12config.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698