| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 | 11 |
| 12 ; These functions are only valid when: | 12 ; These functions are only valid when: |
| 13 ; x_step_q4 == 16 | 13 ; x_step_q4 == 16 |
| 14 ; w%4 == 0 | 14 ; w%4 == 0 |
| 15 ; h%4 == 0 | 15 ; h%4 == 0 |
| 16 ; taps == 8 | 16 ; taps == 8 |
| 17 ; VP9_FILTER_WEIGHT == 128 | 17 ; VP9_FILTER_WEIGHT == 128 |
| 18 ; VP9_FILTER_SHIFT == 7 | 18 ; VP9_FILTER_SHIFT == 7 |
| 19 | 19 |
| 20 EXPORT |vpx_convolve8_avg_horiz_neon| | 20 EXPORT |vpx_convolve8_avg_horiz_neon| |
| 21 EXPORT |vpx_convolve8_avg_vert_neon| | 21 EXPORT |vpx_convolve8_avg_vert_neon| |
| 22 IMPORT |vpx_convolve8_avg_horiz_c| | |
| 23 IMPORT |vpx_convolve8_avg_vert_c| | |
| 24 ARM | 22 ARM |
| 25 REQUIRE8 | 23 REQUIRE8 |
| 26 PRESERVE8 | 24 PRESERVE8 |
| 27 | 25 |
| 28 AREA ||.text||, CODE, READONLY, ALIGN=2 | 26 AREA ||.text||, CODE, READONLY, ALIGN=2 |
| 29 | 27 |
| 30 ; Multiply and accumulate by q0 | 28 ; Multiply and accumulate by q0 |
| 31 MACRO | 29 MACRO |
| 32 MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7 | 30 MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7 |
| 33 vmull.s16 $dst, $src0, d0[0] | 31 vmull.s16 $dst, $src0, d0[0] |
| (...skipping 11 matching lines...) Expand all Loading... |
| 45 ; r2 uint8_t *dst | 43 ; r2 uint8_t *dst |
| 46 ; r3 int dst_stride | 44 ; r3 int dst_stride |
| 47 ; sp[]const int16_t *filter_x | 45 ; sp[]const int16_t *filter_x |
| 48 ; sp[]int x_step_q4 | 46 ; sp[]int x_step_q4 |
| 49 ; sp[]const int16_t *filter_y ; unused | 47 ; sp[]const int16_t *filter_y ; unused |
| 50 ; sp[]int y_step_q4 ; unused | 48 ; sp[]int y_step_q4 ; unused |
| 51 ; sp[]int w | 49 ; sp[]int w |
| 52 ; sp[]int h | 50 ; sp[]int h |
| 53 | 51 |
| 54 |vpx_convolve8_avg_horiz_neon| PROC | 52 |vpx_convolve8_avg_horiz_neon| PROC |
| 55 ldr r12, [sp, #4] ; x_step_q4 | |
| 56 cmp r12, #16 | |
| 57 bne vpx_convolve8_avg_horiz_c | |
| 58 | |
| 59 push {r4-r10, lr} | 53 push {r4-r10, lr} |
| 60 | 54 |
| 61 sub r0, r0, #3 ; adjust for taps | 55 sub r0, r0, #3 ; adjust for taps |
| 62 | 56 |
| 63 ldr r5, [sp, #32] ; filter_x | 57 ldr r5, [sp, #32] ; filter_x |
| 64 ldr r6, [sp, #48] ; w | 58 ldr r6, [sp, #48] ; w |
| 65 ldr r7, [sp, #52] ; h | 59 ldr r7, [sp, #52] ; h |
| 66 | 60 |
| 67 vld1.s16 {q0}, [r5] ; filter_x | 61 vld1.s16 {q0}, [r5] ; filter_x |
| 68 | 62 |
| (...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 177 add r0, r0, r9 ; src += src_stride * 4 - w | 171 add r0, r0, r9 ; src += src_stride * 4 - w |
| 178 add r2, r2, r12 ; dst += dst_stride * 4 - w | 172 add r2, r2, r12 ; dst += dst_stride * 4 - w |
| 179 subs r7, r7, #4 ; h -= 4 | 173 subs r7, r7, #4 ; h -= 4 |
| 180 bgt vpx_convolve8_avg_loop_horiz_v | 174 bgt vpx_convolve8_avg_loop_horiz_v |
| 181 | 175 |
| 182 pop {r4-r10, pc} | 176 pop {r4-r10, pc} |
| 183 | 177 |
| 184 ENDP | 178 ENDP |
| 185 | 179 |
| 186 |vpx_convolve8_avg_vert_neon| PROC | 180 |vpx_convolve8_avg_vert_neon| PROC |
| 187 ldr r12, [sp, #12] | |
| 188 cmp r12, #16 | |
| 189 bne vpx_convolve8_avg_vert_c | |
| 190 | |
| 191 push {r4-r8, lr} | 181 push {r4-r8, lr} |
| 192 | 182 |
| 193 ; adjust for taps | 183 ; adjust for taps |
| 194 sub r0, r0, r1 | 184 sub r0, r0, r1 |
| 195 sub r0, r0, r1, lsl #1 | 185 sub r0, r0, r1, lsl #1 |
| 196 | 186 |
| 197 ldr r4, [sp, #32] ; filter_y | 187 ldr r4, [sp, #32] ; filter_y |
| 198 ldr r6, [sp, #40] ; w | 188 ldr r6, [sp, #40] ; w |
| 199 ldr lr, [sp, #44] ; h | 189 ldr lr, [sp, #44] ; h |
| 200 | 190 |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 293 ; outer loop | 283 ; outer loop |
| 294 add r0, r0, #4 | 284 add r0, r0, #4 |
| 295 add r2, r2, #4 | 285 add r2, r2, #4 |
| 296 subs r6, r6, #4 ; w -= 4 | 286 subs r6, r6, #4 ; w -= 4 |
| 297 bgt vpx_convolve8_avg_loop_vert_h | 287 bgt vpx_convolve8_avg_loop_vert_h |
| 298 | 288 |
| 299 pop {r4-r8, pc} | 289 pop {r4-r8, pc} |
| 300 | 290 |
| 301 ENDP | 291 ENDP |
| 302 END | 292 END |
| OLD | NEW |