| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 | 11 |
| 12 ; These functions are only valid when: | 12 ; These functions are only valid when: |
| 13 ; x_step_q4 == 16 | 13 ; x_step_q4 == 16 |
| 14 ; w%4 == 0 | 14 ; w%4 == 0 |
| 15 ; h%4 == 0 | 15 ; h%4 == 0 |
| 16 ; taps == 8 | 16 ; taps == 8 |
| 17 ; VP9_FILTER_WEIGHT == 128 | 17 ; VP9_FILTER_WEIGHT == 128 |
| 18 ; VP9_FILTER_SHIFT == 7 | 18 ; VP9_FILTER_SHIFT == 7 |
| 19 | 19 |
| 20 EXPORT |vpx_convolve8_horiz_neon| | 20 EXPORT |vpx_convolve8_horiz_neon| |
| 21 EXPORT |vpx_convolve8_vert_neon| | 21 EXPORT |vpx_convolve8_vert_neon| |
| 22 IMPORT |vpx_convolve8_horiz_c| | |
| 23 IMPORT |vpx_convolve8_vert_c| | |
| 24 ARM | 22 ARM |
| 25 REQUIRE8 | 23 REQUIRE8 |
| 26 PRESERVE8 | 24 PRESERVE8 |
| 27 | 25 |
| 28 AREA ||.text||, CODE, READONLY, ALIGN=2 | 26 AREA ||.text||, CODE, READONLY, ALIGN=2 |
| 29 | 27 |
| 30 ; Multiply and accumulate by q0 | 28 ; Multiply and accumulate by q0 |
| 31 MACRO | 29 MACRO |
| 32 MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7 | 30 MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7 |
| 33 vmull.s16 $dst, $src0, d0[0] | 31 vmull.s16 $dst, $src0, d0[0] |
| (...skipping 11 matching lines...) Expand all Loading... |
| 45 ; r2 uint8_t *dst | 43 ; r2 uint8_t *dst |
| 46 ; r3 int dst_stride | 44 ; r3 int dst_stride |
| 47 ; sp[]const int16_t *filter_x | 45 ; sp[]const int16_t *filter_x |
| 48 ; sp[]int x_step_q4 | 46 ; sp[]int x_step_q4 |
| 49 ; sp[]const int16_t *filter_y ; unused | 47 ; sp[]const int16_t *filter_y ; unused |
| 50 ; sp[]int y_step_q4 ; unused | 48 ; sp[]int y_step_q4 ; unused |
| 51 ; sp[]int w | 49 ; sp[]int w |
| 52 ; sp[]int h | 50 ; sp[]int h |
| 53 | 51 |
| 54 |vpx_convolve8_horiz_neon| PROC | 52 |vpx_convolve8_horiz_neon| PROC |
| 55 ldr r12, [sp, #4] ; x_step_q4 | |
| 56 cmp r12, #16 | |
| 57 bne vpx_convolve8_horiz_c | |
| 58 | |
| 59 push {r4-r10, lr} | 53 push {r4-r10, lr} |
| 60 | 54 |
| 61 sub r0, r0, #3 ; adjust for taps | 55 sub r0, r0, #3 ; adjust for taps |
| 62 | 56 |
| 63 ldr r5, [sp, #32] ; filter_x | 57 ldr r5, [sp, #32] ; filter_x |
| 64 ldr r6, [sp, #48] ; w | 58 ldr r6, [sp, #48] ; w |
| 65 ldr r7, [sp, #52] ; h | 59 ldr r7, [sp, #52] ; h |
| 66 | 60 |
| 67 vld1.s16 {q0}, [r5] ; filter_x | 61 vld1.s16 {q0}, [r5] ; filter_x |
| 68 | 62 |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 166 add r0, r0, r9 ; src += src_stride * 4 - w | 160 add r0, r0, r9 ; src += src_stride * 4 - w |
| 167 add r2, r2, r12 ; dst += dst_stride * 4 - w | 161 add r2, r2, r12 ; dst += dst_stride * 4 - w |
| 168 subs r7, r7, #4 ; h -= 4 | 162 subs r7, r7, #4 ; h -= 4 |
| 169 bgt vpx_convolve8_loop_horiz_v | 163 bgt vpx_convolve8_loop_horiz_v |
| 170 | 164 |
| 171 pop {r4-r10, pc} | 165 pop {r4-r10, pc} |
| 172 | 166 |
| 173 ENDP | 167 ENDP |
| 174 | 168 |
| 175 |vpx_convolve8_vert_neon| PROC | 169 |vpx_convolve8_vert_neon| PROC |
| 176 ldr r12, [sp, #12] | |
| 177 cmp r12, #16 | |
| 178 bne vpx_convolve8_vert_c | |
| 179 | |
| 180 push {r4-r8, lr} | 170 push {r4-r8, lr} |
| 181 | 171 |
| 182 ; adjust for taps | 172 ; adjust for taps |
| 183 sub r0, r0, r1 | 173 sub r0, r0, r1 |
| 184 sub r0, r0, r1, lsl #1 | 174 sub r0, r0, r1, lsl #1 |
| 185 | 175 |
| 186 ldr r4, [sp, #32] ; filter_y | 176 ldr r4, [sp, #32] ; filter_y |
| 187 ldr r6, [sp, #40] ; w | 177 ldr r6, [sp, #40] ; w |
| 188 ldr lr, [sp, #44] ; h | 178 ldr lr, [sp, #44] ; h |
| 189 | 179 |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 271 ; outer loop | 261 ; outer loop |
| 272 add r0, r0, #4 | 262 add r0, r0, #4 |
| 273 add r2, r2, #4 | 263 add r2, r2, #4 |
| 274 subs r6, r6, #4 ; w -= 4 | 264 subs r6, r6, #4 ; w -= 4 |
| 275 bgt vpx_convolve8_loop_vert_h | 265 bgt vpx_convolve8_loop_vert_h |
| 276 | 266 |
| 277 pop {r4-r8, pc} | 267 pop {r4-r8, pc} |
| 278 | 268 |
| 279 ENDP | 269 ENDP |
| 280 END | 270 END |
| OLD | NEW |