OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
11 | 11 |
12 ; These functions are only valid when: | 12 ; These functions are only valid when: |
13 ; x_step_q4 == 16 | 13 ; x_step_q4 == 16 |
14 ; w%4 == 0 | 14 ; w%4 == 0 |
15 ; h%4 == 0 | 15 ; h%4 == 0 |
16 ; taps == 8 | 16 ; taps == 8 |
17 ; VP9_FILTER_WEIGHT == 128 | 17 ; VP9_FILTER_WEIGHT == 128 |
18 ; VP9_FILTER_SHIFT == 7 | 18 ; VP9_FILTER_SHIFT == 7 |
19 | 19 |
20 EXPORT |vpx_convolve8_horiz_neon| | 20 EXPORT |vpx_convolve8_horiz_neon| |
21 EXPORT |vpx_convolve8_vert_neon| | 21 EXPORT |vpx_convolve8_vert_neon| |
22 IMPORT |vpx_convolve8_horiz_c| | |
23 IMPORT |vpx_convolve8_vert_c| | |
24 ARM | 22 ARM |
25 REQUIRE8 | 23 REQUIRE8 |
26 PRESERVE8 | 24 PRESERVE8 |
27 | 25 |
28 AREA ||.text||, CODE, READONLY, ALIGN=2 | 26 AREA ||.text||, CODE, READONLY, ALIGN=2 |
29 | 27 |
30 ; Multiply and accumulate by q0 | 28 ; Multiply and accumulate by q0 |
31 MACRO | 29 MACRO |
32 MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7 | 30 MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7 |
33 vmull.s16 $dst, $src0, d0[0] | 31 vmull.s16 $dst, $src0, d0[0] |
(...skipping 11 matching lines...) Expand all Loading... |
45 ; r2 uint8_t *dst | 43 ; r2 uint8_t *dst |
46 ; r3 int dst_stride | 44 ; r3 int dst_stride |
47 ; sp[]const int16_t *filter_x | 45 ; sp[]const int16_t *filter_x |
48 ; sp[]int x_step_q4 | 46 ; sp[]int x_step_q4 |
49 ; sp[]const int16_t *filter_y ; unused | 47 ; sp[]const int16_t *filter_y ; unused |
50 ; sp[]int y_step_q4 ; unused | 48 ; sp[]int y_step_q4 ; unused |
51 ; sp[]int w | 49 ; sp[]int w |
52 ; sp[]int h | 50 ; sp[]int h |
53 | 51 |
54 |vpx_convolve8_horiz_neon| PROC | 52 |vpx_convolve8_horiz_neon| PROC |
55 ldr r12, [sp, #4] ; x_step_q4 | |
56 cmp r12, #16 | |
57 bne vpx_convolve8_horiz_c | |
58 | |
59 push {r4-r10, lr} | 53 push {r4-r10, lr} |
60 | 54 |
61 sub r0, r0, #3 ; adjust for taps | 55 sub r0, r0, #3 ; adjust for taps |
62 | 56 |
63 ldr r5, [sp, #32] ; filter_x | 57 ldr r5, [sp, #32] ; filter_x |
64 ldr r6, [sp, #48] ; w | 58 ldr r6, [sp, #48] ; w |
65 ldr r7, [sp, #52] ; h | 59 ldr r7, [sp, #52] ; h |
66 | 60 |
67 vld1.s16 {q0}, [r5] ; filter_x | 61 vld1.s16 {q0}, [r5] ; filter_x |
68 | 62 |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
166 add r0, r0, r9 ; src += src_stride * 4 - w | 160 add r0, r0, r9 ; src += src_stride * 4 - w |
167 add r2, r2, r12 ; dst += dst_stride * 4 - w | 161 add r2, r2, r12 ; dst += dst_stride * 4 - w |
168 subs r7, r7, #4 ; h -= 4 | 162 subs r7, r7, #4 ; h -= 4 |
169 bgt vpx_convolve8_loop_horiz_v | 163 bgt vpx_convolve8_loop_horiz_v |
170 | 164 |
171 pop {r4-r10, pc} | 165 pop {r4-r10, pc} |
172 | 166 |
173 ENDP | 167 ENDP |
174 | 168 |
175 |vpx_convolve8_vert_neon| PROC | 169 |vpx_convolve8_vert_neon| PROC |
176 ldr r12, [sp, #12] | |
177 cmp r12, #16 | |
178 bne vpx_convolve8_vert_c | |
179 | |
180 push {r4-r8, lr} | 170 push {r4-r8, lr} |
181 | 171 |
182 ; adjust for taps | 172 ; adjust for taps |
183 sub r0, r0, r1 | 173 sub r0, r0, r1 |
184 sub r0, r0, r1, lsl #1 | 174 sub r0, r0, r1, lsl #1 |
185 | 175 |
186 ldr r4, [sp, #32] ; filter_y | 176 ldr r4, [sp, #32] ; filter_y |
187 ldr r6, [sp, #40] ; w | 177 ldr r6, [sp, #40] ; w |
188 ldr lr, [sp, #44] ; h | 178 ldr lr, [sp, #44] ; h |
189 | 179 |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
271 ; outer loop | 261 ; outer loop |
272 add r0, r0, #4 | 262 add r0, r0, #4 |
273 add r2, r2, #4 | 263 add r2, r2, #4 |
274 subs r6, r6, #4 ; w -= 4 | 264 subs r6, r6, #4 ; w -= 4 |
275 bgt vpx_convolve8_loop_vert_h | 265 bgt vpx_convolve8_loop_vert_h |
276 | 266 |
277 pop {r4-r8, pc} | 267 pop {r4-r8, pc} |
278 | 268 |
279 ENDP | 269 ENDP |
280 END | 270 END |
OLD | NEW |