Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(563)

Side by Side Diff: source/libvpx/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm

Issue 554673004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 EXPORT |vp8_build_intra_predictors_mby_neon_func|
13 EXPORT |vp8_build_intra_predictors_mby_s_neon_func|
14
15 ARM
16 REQUIRE8
17 PRESERVE8
18
19 AREA ||.text||, CODE, READONLY, ALIGN=2
20 ; r0 unsigned char *y_buffer
21 ; r1 unsigned char *ypred_ptr
22 ; r2 int y_stride
23 ; r3 int mode
24 ; stack int Up
25 ; stack int Left
26
27 |vp8_build_intra_predictors_mby_neon_func| PROC
28 push {r4-r8, lr}
29 vpush {d8-d15}
30
31 cmp r3, #0
32 beq case_dc_pred
33 cmp r3, #1
34 beq case_v_pred
35 cmp r3, #2
36 beq case_h_pred
37 cmp r3, #3
38 beq case_tm_pred
39
40 case_dc_pred
41 ldr r4, [sp, #88] ; Up
42 ldr r5, [sp, #92] ; Left
43
44 ; Default the DC average to 128
45 mov r12, #128
46 vdup.u8 q0, r12
47
48 ; Zero out running sum
49 mov r12, #0
50
51 ; compute shift and jump
52 adds r7, r4, r5
53 beq skip_dc_pred_up_left
54
55 ; Load above row, if it exists
56 cmp r4, #0
57 beq skip_dc_pred_up
58
59 sub r6, r0, r2
60 vld1.8 {q1}, [r6]
61 vpaddl.u8 q2, q1
62 vpaddl.u16 q3, q2
63 vpaddl.u32 q4, q3
64
65 vmov.32 r4, d8[0]
66 vmov.32 r6, d9[0]
67
68 add r12, r4, r6
69
70 ; Move back to interger registers
71
72 skip_dc_pred_up
73
74 cmp r5, #0
75 beq skip_dc_pred_left
76
77 sub r0, r0, #1
78
79 ; Load left row, if it exists
80 ldrb r3, [r0], r2
81 ldrb r4, [r0], r2
82 ldrb r5, [r0], r2
83 ldrb r6, [r0], r2
84
85 add r12, r12, r3
86 add r12, r12, r4
87 add r12, r12, r5
88 add r12, r12, r6
89
90 ldrb r3, [r0], r2
91 ldrb r4, [r0], r2
92 ldrb r5, [r0], r2
93 ldrb r6, [r0], r2
94
95 add r12, r12, r3
96 add r12, r12, r4
97 add r12, r12, r5
98 add r12, r12, r6
99
100 ldrb r3, [r0], r2
101 ldrb r4, [r0], r2
102 ldrb r5, [r0], r2
103 ldrb r6, [r0], r2
104
105 add r12, r12, r3
106 add r12, r12, r4
107 add r12, r12, r5
108 add r12, r12, r6
109
110 ldrb r3, [r0], r2
111 ldrb r4, [r0], r2
112 ldrb r5, [r0], r2
113 ldrb r6, [r0]
114
115 add r12, r12, r3
116 add r12, r12, r4
117 add r12, r12, r5
118 add r12, r12, r6
119
120 skip_dc_pred_left
121 add r7, r7, #3 ; Shift
122 sub r4, r7, #1
123 mov r5, #1
124 add r12, r12, r5, lsl r4
125 mov r5, r12, lsr r7 ; expected_dc
126
127 vdup.u8 q0, r5
128
129 skip_dc_pred_up_left
130 vst1.u8 {q0}, [r1]!
131 vst1.u8 {q0}, [r1]!
132 vst1.u8 {q0}, [r1]!
133 vst1.u8 {q0}, [r1]!
134 vst1.u8 {q0}, [r1]!
135 vst1.u8 {q0}, [r1]!
136 vst1.u8 {q0}, [r1]!
137 vst1.u8 {q0}, [r1]!
138 vst1.u8 {q0}, [r1]!
139 vst1.u8 {q0}, [r1]!
140 vst1.u8 {q0}, [r1]!
141 vst1.u8 {q0}, [r1]!
142 vst1.u8 {q0}, [r1]!
143 vst1.u8 {q0}, [r1]!
144 vst1.u8 {q0}, [r1]!
145 vst1.u8 {q0}, [r1]!
146
147 vpop {d8-d15}
148 pop {r4-r8,pc}
149 case_v_pred
150 ; Copy down above row
151 sub r6, r0, r2
152 vld1.8 {q0}, [r6]
153
154 vst1.u8 {q0}, [r1]!
155 vst1.u8 {q0}, [r1]!
156 vst1.u8 {q0}, [r1]!
157 vst1.u8 {q0}, [r1]!
158 vst1.u8 {q0}, [r1]!
159 vst1.u8 {q0}, [r1]!
160 vst1.u8 {q0}, [r1]!
161 vst1.u8 {q0}, [r1]!
162 vst1.u8 {q0}, [r1]!
163 vst1.u8 {q0}, [r1]!
164 vst1.u8 {q0}, [r1]!
165 vst1.u8 {q0}, [r1]!
166 vst1.u8 {q0}, [r1]!
167 vst1.u8 {q0}, [r1]!
168 vst1.u8 {q0}, [r1]!
169 vst1.u8 {q0}, [r1]!
170 vpop {d8-d15}
171 pop {r4-r8,pc}
172
173 case_h_pred
174 ; Load 4x yleft_col
175 sub r0, r0, #1
176
177 ldrb r3, [r0], r2
178 ldrb r4, [r0], r2
179 ldrb r5, [r0], r2
180 ldrb r6, [r0], r2
181 vdup.u8 q0, r3
182 vdup.u8 q1, r4
183 vdup.u8 q2, r5
184 vdup.u8 q3, r6
185 vst1.u8 {q0}, [r1]!
186 vst1.u8 {q1}, [r1]!
187 vst1.u8 {q2}, [r1]!
188 vst1.u8 {q3}, [r1]!
189
190 ldrb r3, [r0], r2
191 ldrb r4, [r0], r2
192 ldrb r5, [r0], r2
193 ldrb r6, [r0], r2
194 vdup.u8 q0, r3
195 vdup.u8 q1, r4
196 vdup.u8 q2, r5
197 vdup.u8 q3, r6
198 vst1.u8 {q0}, [r1]!
199 vst1.u8 {q1}, [r1]!
200 vst1.u8 {q2}, [r1]!
201 vst1.u8 {q3}, [r1]!
202
203
204 ldrb r3, [r0], r2
205 ldrb r4, [r0], r2
206 ldrb r5, [r0], r2
207 ldrb r6, [r0], r2
208 vdup.u8 q0, r3
209 vdup.u8 q1, r4
210 vdup.u8 q2, r5
211 vdup.u8 q3, r6
212 vst1.u8 {q0}, [r1]!
213 vst1.u8 {q1}, [r1]!
214 vst1.u8 {q2}, [r1]!
215 vst1.u8 {q3}, [r1]!
216
217 ldrb r3, [r0], r2
218 ldrb r4, [r0], r2
219 ldrb r5, [r0], r2
220 ldrb r6, [r0], r2
221 vdup.u8 q0, r3
222 vdup.u8 q1, r4
223 vdup.u8 q2, r5
224 vdup.u8 q3, r6
225 vst1.u8 {q0}, [r1]!
226 vst1.u8 {q1}, [r1]!
227 vst1.u8 {q2}, [r1]!
228 vst1.u8 {q3}, [r1]!
229
230 vpop {d8-d15}
231 pop {r4-r8,pc}
232
233 case_tm_pred
234 ; Load yabove_row
235 sub r3, r0, r2
236 vld1.8 {q8}, [r3]
237
238 ; Load ytop_left
239 sub r3, r3, #1
240 ldrb r7, [r3]
241
242 vdup.u16 q7, r7
243
244 ; Compute yabove_row - ytop_left
245 mov r3, #1
246 vdup.u8 q0, r3
247
248 vmull.u8 q4, d16, d0
249 vmull.u8 q5, d17, d0
250
251 vsub.s16 q4, q4, q7
252 vsub.s16 q5, q5, q7
253
254 ; Load 4x yleft_col
255 sub r0, r0, #1
256 mov r12, #4
257
258 case_tm_pred_loop
259 ldrb r3, [r0], r2
260 ldrb r4, [r0], r2
261 ldrb r5, [r0], r2
262 ldrb r6, [r0], r2
263 vdup.u16 q0, r3
264 vdup.u16 q1, r4
265 vdup.u16 q2, r5
266 vdup.u16 q3, r6
267
268 vqadd.s16 q8, q0, q4
269 vqadd.s16 q9, q0, q5
270
271 vqadd.s16 q10, q1, q4
272 vqadd.s16 q11, q1, q5
273
274 vqadd.s16 q12, q2, q4
275 vqadd.s16 q13, q2, q5
276
277 vqadd.s16 q14, q3, q4
278 vqadd.s16 q15, q3, q5
279
280 vqshrun.s16 d0, q8, #0
281 vqshrun.s16 d1, q9, #0
282
283 vqshrun.s16 d2, q10, #0
284 vqshrun.s16 d3, q11, #0
285
286 vqshrun.s16 d4, q12, #0
287 vqshrun.s16 d5, q13, #0
288
289 vqshrun.s16 d6, q14, #0
290 vqshrun.s16 d7, q15, #0
291
292 vst1.u8 {q0}, [r1]!
293 vst1.u8 {q1}, [r1]!
294 vst1.u8 {q2}, [r1]!
295 vst1.u8 {q3}, [r1]!
296
297 subs r12, r12, #1
298 bne case_tm_pred_loop
299
300 vpop {d8-d15}
301 pop {r4-r8,pc}
302
303 ENDP
304
305 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
306 ; r0 unsigned char *y_buffer
307 ; r1 unsigned char *ypred_ptr
308 ; r2 int y_stride
309 ; r3 int mode
310 ; stack int Up
311 ; stack int Left
312
313 |vp8_build_intra_predictors_mby_s_neon_func| PROC
314 push {r4-r8, lr}
315 vpush {d8-d15}
316
317 mov r1, r0 ; unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor;
318
319 cmp r3, #0
320 beq case_dc_pred_s
321 cmp r3, #1
322 beq case_v_pred_s
323 cmp r3, #2
324 beq case_h_pred_s
325 cmp r3, #3
326 beq case_tm_pred_s
327
328 case_dc_pred_s
329 ldr r4, [sp, #88] ; Up
330 ldr r5, [sp, #92] ; Left
331
332 ; Default the DC average to 128
333 mov r12, #128
334 vdup.u8 q0, r12
335
336 ; Zero out running sum
337 mov r12, #0
338
339 ; compute shift and jump
340 adds r7, r4, r5
341 beq skip_dc_pred_up_left_s
342
343 ; Load above row, if it exists
344 cmp r4, #0
345 beq skip_dc_pred_up_s
346
347 sub r6, r0, r2
348 vld1.8 {q1}, [r6]
349 vpaddl.u8 q2, q1
350 vpaddl.u16 q3, q2
351 vpaddl.u32 q4, q3
352
353 vmov.32 r4, d8[0]
354 vmov.32 r6, d9[0]
355
356 add r12, r4, r6
357
358 ; Move back to interger registers
359
360 skip_dc_pred_up_s
361
362 cmp r5, #0
363 beq skip_dc_pred_left_s
364
365 sub r0, r0, #1
366
367 ; Load left row, if it exists
368 ldrb r3, [r0], r2
369 ldrb r4, [r0], r2
370 ldrb r5, [r0], r2
371 ldrb r6, [r0], r2
372
373 add r12, r12, r3
374 add r12, r12, r4
375 add r12, r12, r5
376 add r12, r12, r6
377
378 ldrb r3, [r0], r2
379 ldrb r4, [r0], r2
380 ldrb r5, [r0], r2
381 ldrb r6, [r0], r2
382
383 add r12, r12, r3
384 add r12, r12, r4
385 add r12, r12, r5
386 add r12, r12, r6
387
388 ldrb r3, [r0], r2
389 ldrb r4, [r0], r2
390 ldrb r5, [r0], r2
391 ldrb r6, [r0], r2
392
393 add r12, r12, r3
394 add r12, r12, r4
395 add r12, r12, r5
396 add r12, r12, r6
397
398 ldrb r3, [r0], r2
399 ldrb r4, [r0], r2
400 ldrb r5, [r0], r2
401 ldrb r6, [r0]
402
403 add r12, r12, r3
404 add r12, r12, r4
405 add r12, r12, r5
406 add r12, r12, r6
407
408 skip_dc_pred_left_s
409 add r7, r7, #3 ; Shift
410 sub r4, r7, #1
411 mov r5, #1
412 add r12, r12, r5, lsl r4
413 mov r5, r12, lsr r7 ; expected_dc
414
415 vdup.u8 q0, r5
416
417 skip_dc_pred_up_left_s
418 vst1.u8 {q0}, [r1], r2
419 vst1.u8 {q0}, [r1], r2
420 vst1.u8 {q0}, [r1], r2
421 vst1.u8 {q0}, [r1], r2
422 vst1.u8 {q0}, [r1], r2
423 vst1.u8 {q0}, [r1], r2
424 vst1.u8 {q0}, [r1], r2
425 vst1.u8 {q0}, [r1], r2
426 vst1.u8 {q0}, [r1], r2
427 vst1.u8 {q0}, [r1], r2
428 vst1.u8 {q0}, [r1], r2
429 vst1.u8 {q0}, [r1], r2
430 vst1.u8 {q0}, [r1], r2
431 vst1.u8 {q0}, [r1], r2
432 vst1.u8 {q0}, [r1], r2
433 vst1.u8 {q0}, [r1], r2
434
435 vpop {d8-d15}
436 pop {r4-r8,pc}
437 case_v_pred_s
438 ; Copy down above row
439 sub r6, r0, r2
440 vld1.8 {q0}, [r6]
441
442 vst1.u8 {q0}, [r1], r2
443 vst1.u8 {q0}, [r1], r2
444 vst1.u8 {q0}, [r1], r2
445 vst1.u8 {q0}, [r1], r2
446 vst1.u8 {q0}, [r1], r2
447 vst1.u8 {q0}, [r1], r2
448 vst1.u8 {q0}, [r1], r2
449 vst1.u8 {q0}, [r1], r2
450 vst1.u8 {q0}, [r1], r2
451 vst1.u8 {q0}, [r1], r2
452 vst1.u8 {q0}, [r1], r2
453 vst1.u8 {q0}, [r1], r2
454 vst1.u8 {q0}, [r1], r2
455 vst1.u8 {q0}, [r1], r2
456 vst1.u8 {q0}, [r1], r2
457 vst1.u8 {q0}, [r1], r2
458
459 vpop {d8-d15}
460 pop {r4-r8,pc}
461
462 case_h_pred_s
463 ; Load 4x yleft_col
464 sub r0, r0, #1
465
466 ldrb r3, [r0], r2
467 ldrb r4, [r0], r2
468 ldrb r5, [r0], r2
469 ldrb r6, [r0], r2
470 vdup.u8 q0, r3
471 vdup.u8 q1, r4
472 vdup.u8 q2, r5
473 vdup.u8 q3, r6
474 vst1.u8 {q0}, [r1], r2
475 vst1.u8 {q1}, [r1], r2
476 vst1.u8 {q2}, [r1], r2
477 vst1.u8 {q3}, [r1], r2
478
479 ldrb r3, [r0], r2
480 ldrb r4, [r0], r2
481 ldrb r5, [r0], r2
482 ldrb r6, [r0], r2
483 vdup.u8 q0, r3
484 vdup.u8 q1, r4
485 vdup.u8 q2, r5
486 vdup.u8 q3, r6
487 vst1.u8 {q0}, [r1], r2
488 vst1.u8 {q1}, [r1], r2
489 vst1.u8 {q2}, [r1], r2
490 vst1.u8 {q3}, [r1], r2
491
492
493 ldrb r3, [r0], r2
494 ldrb r4, [r0], r2
495 ldrb r5, [r0], r2
496 ldrb r6, [r0], r2
497 vdup.u8 q0, r3
498 vdup.u8 q1, r4
499 vdup.u8 q2, r5
500 vdup.u8 q3, r6
501 vst1.u8 {q0}, [r1], r2
502 vst1.u8 {q1}, [r1], r2
503 vst1.u8 {q2}, [r1], r2
504 vst1.u8 {q3}, [r1], r2
505
506 ldrb r3, [r0], r2
507 ldrb r4, [r0], r2
508 ldrb r5, [r0], r2
509 ldrb r6, [r0], r2
510 vdup.u8 q0, r3
511 vdup.u8 q1, r4
512 vdup.u8 q2, r5
513 vdup.u8 q3, r6
514 vst1.u8 {q0}, [r1], r2
515 vst1.u8 {q1}, [r1], r2
516 vst1.u8 {q2}, [r1], r2
517 vst1.u8 {q3}, [r1], r2
518
519 vpop {d8-d15}
520 pop {r4-r8,pc}
521
522 case_tm_pred_s
523 ; Load yabove_row
524 sub r3, r0, r2
525 vld1.8 {q8}, [r3]
526
527 ; Load ytop_left
528 sub r3, r3, #1
529 ldrb r7, [r3]
530
531 vdup.u16 q7, r7
532
533 ; Compute yabove_row - ytop_left
534 mov r3, #1
535 vdup.u8 q0, r3
536
537 vmull.u8 q4, d16, d0
538 vmull.u8 q5, d17, d0
539
540 vsub.s16 q4, q4, q7
541 vsub.s16 q5, q5, q7
542
543 ; Load 4x yleft_col
544 sub r0, r0, #1
545 mov r12, #4
546
547 case_tm_pred_loop_s
548 ldrb r3, [r0], r2
549 ldrb r4, [r0], r2
550 ldrb r5, [r0], r2
551 ldrb r6, [r0], r2
552 vdup.u16 q0, r3
553 vdup.u16 q1, r4
554 vdup.u16 q2, r5
555 vdup.u16 q3, r6
556
557 vqadd.s16 q8, q0, q4
558 vqadd.s16 q9, q0, q5
559
560 vqadd.s16 q10, q1, q4
561 vqadd.s16 q11, q1, q5
562
563 vqadd.s16 q12, q2, q4
564 vqadd.s16 q13, q2, q5
565
566 vqadd.s16 q14, q3, q4
567 vqadd.s16 q15, q3, q5
568
569 vqshrun.s16 d0, q8, #0
570 vqshrun.s16 d1, q9, #0
571
572 vqshrun.s16 d2, q10, #0
573 vqshrun.s16 d3, q11, #0
574
575 vqshrun.s16 d4, q12, #0
576 vqshrun.s16 d5, q13, #0
577
578 vqshrun.s16 d6, q14, #0
579 vqshrun.s16 d7, q15, #0
580
581 vst1.u8 {q0}, [r1], r2
582 vst1.u8 {q1}, [r1], r2
583 vst1.u8 {q2}, [r1], r2
584 vst1.u8 {q3}, [r1], r2
585
586 subs r12, r12, #1
587 bne case_tm_pred_loop_s
588
589 vpop {d8-d15}
590 pop {r4-r8,pc}
591
592 ENDP
593
594
595 END
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c ('k') | source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698