OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 EXPORT |vp9_convolve_avg_neon| | |
12 ARM | |
13 REQUIRE8 | |
14 PRESERVE8 | |
15 | |
16 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
17 | |
18 |vp9_convolve_avg_neon| PROC | |
19 push {r4-r6, lr} | |
20 ldrd r4, r5, [sp, #32] | |
21 mov r6, r2 | |
22 | |
23 cmp r4, #32 | |
24 bgt avg64 | |
25 beq avg32 | |
26 cmp r4, #8 | |
27 bgt avg16 | |
28 beq avg8 | |
29 b avg4 | |
30 | |
31 avg64 | |
32 sub lr, r1, #32 | |
33 sub r4, r3, #32 | |
34 avg64_h | |
35 pld [r0, r1, lsl #1] | |
36 vld1.8 {q0-q1}, [r0]! | |
37 vld1.8 {q2-q3}, [r0], lr | |
38 pld [r2, r3] | |
39 vld1.8 {q8-q9}, [r6@128]! | |
40 vld1.8 {q10-q11}, [r6@128], r4 | |
41 vrhadd.u8 q0, q0, q8 | |
42 vrhadd.u8 q1, q1, q9 | |
43 vrhadd.u8 q2, q2, q10 | |
44 vrhadd.u8 q3, q3, q11 | |
45 vst1.8 {q0-q1}, [r2@128]! | |
46 vst1.8 {q2-q3}, [r2@128], r4 | |
47 subs r5, r5, #1 | |
48 bgt avg64_h | |
49 pop {r4-r6, pc} | |
50 | |
51 avg32 | |
52 vld1.8 {q0-q1}, [r0], r1 | |
53 vld1.8 {q2-q3}, [r0], r1 | |
54 vld1.8 {q8-q9}, [r6@128], r3 | |
55 vld1.8 {q10-q11}, [r6@128], r3 | |
56 pld [r0] | |
57 vrhadd.u8 q0, q0, q8 | |
58 pld [r0, r1] | |
59 vrhadd.u8 q1, q1, q9 | |
60 pld [r6] | |
61 vrhadd.u8 q2, q2, q10 | |
62 pld [r6, r3] | |
63 vrhadd.u8 q3, q3, q11 | |
64 vst1.8 {q0-q1}, [r2@128], r3 | |
65 vst1.8 {q2-q3}, [r2@128], r3 | |
66 subs r5, r5, #2 | |
67 bgt avg32 | |
68 pop {r4-r6, pc} | |
69 | |
70 avg16 | |
71 vld1.8 {q0}, [r0], r1 | |
72 vld1.8 {q1}, [r0], r1 | |
73 vld1.8 {q2}, [r6@128], r3 | |
74 vld1.8 {q3}, [r6@128], r3 | |
75 pld [r0] | |
76 pld [r0, r1] | |
77 vrhadd.u8 q0, q0, q2 | |
78 pld [r6] | |
79 pld [r6, r3] | |
80 vrhadd.u8 q1, q1, q3 | |
81 vst1.8 {q0}, [r2@128], r3 | |
82 vst1.8 {q1}, [r2@128], r3 | |
83 subs r5, r5, #2 | |
84 bgt avg16 | |
85 pop {r4-r6, pc} | |
86 | |
87 avg8 | |
88 vld1.8 {d0}, [r0], r1 | |
89 vld1.8 {d1}, [r0], r1 | |
90 vld1.8 {d2}, [r6@64], r3 | |
91 vld1.8 {d3}, [r6@64], r3 | |
92 pld [r0] | |
93 pld [r0, r1] | |
94 vrhadd.u8 q0, q0, q1 | |
95 pld [r6] | |
96 pld [r6, r3] | |
97 vst1.8 {d0}, [r2@64], r3 | |
98 vst1.8 {d1}, [r2@64], r3 | |
99 subs r5, r5, #2 | |
100 bgt avg8 | |
101 pop {r4-r6, pc} | |
102 | |
103 avg4 | |
104 vld1.32 {d0[0]}, [r0], r1 | |
105 vld1.32 {d0[1]}, [r0], r1 | |
106 vld1.32 {d2[0]}, [r6@32], r3 | |
107 vld1.32 {d2[1]}, [r6@32], r3 | |
108 vrhadd.u8 d0, d0, d2 | |
109 vst1.32 {d0[0]}, [r2@32], r3 | |
110 vst1.32 {d0[1]}, [r2@32], r3 | |
111 subs r5, r5, #2 | |
112 bgt avg4 | |
113 pop {r4-r6, pc} | |
114 ENDP | |
115 | |
116 END | |
OLD | NEW |