Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(439)

Side by Side Diff: source/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.asm

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 11
12 ; These functions are only valid when: 12 ; These functions are only valid when:
13 ; x_step_q4 == 16 13 ; x_step_q4 == 16
14 ; w%4 == 0 14 ; w%4 == 0
15 ; h%4 == 0 15 ; h%4 == 0
16 ; taps == 8 16 ; taps == 8
17 ; VP9_FILTER_WEIGHT == 128 17 ; VP9_FILTER_WEIGHT == 128
18 ; VP9_FILTER_SHIFT == 7 18 ; VP9_FILTER_SHIFT == 7
19 19
20 EXPORT |vpx_convolve8_horiz_neon| 20 EXPORT |vpx_convolve8_horiz_neon|
21 EXPORT |vpx_convolve8_vert_neon| 21 EXPORT |vpx_convolve8_vert_neon|
22 IMPORT |vpx_convolve8_horiz_c|
23 IMPORT |vpx_convolve8_vert_c|
24 ARM 22 ARM
25 REQUIRE8 23 REQUIRE8
26 PRESERVE8 24 PRESERVE8
27 25
28 AREA ||.text||, CODE, READONLY, ALIGN=2 26 AREA ||.text||, CODE, READONLY, ALIGN=2
29 27
30 ; Multiply and accumulate by q0 28 ; Multiply and accumulate by q0
31 MACRO 29 MACRO
32 MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7 30 MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7
33 vmull.s16 $dst, $src0, d0[0] 31 vmull.s16 $dst, $src0, d0[0]
(...skipping 11 matching lines...) Expand all
45 ; r2 uint8_t *dst 43 ; r2 uint8_t *dst
46 ; r3 int dst_stride 44 ; r3 int dst_stride
47 ; sp[]const int16_t *filter_x 45 ; sp[]const int16_t *filter_x
48 ; sp[]int x_step_q4 46 ; sp[]int x_step_q4
49 ; sp[]const int16_t *filter_y ; unused 47 ; sp[]const int16_t *filter_y ; unused
50 ; sp[]int y_step_q4 ; unused 48 ; sp[]int y_step_q4 ; unused
51 ; sp[]int w 49 ; sp[]int w
52 ; sp[]int h 50 ; sp[]int h
53 51
54 |vpx_convolve8_horiz_neon| PROC 52 |vpx_convolve8_horiz_neon| PROC
55 ldr r12, [sp, #4] ; x_step_q4
56 cmp r12, #16
57 bne vpx_convolve8_horiz_c
58
59 push {r4-r10, lr} 53 push {r4-r10, lr}
60 54
61 sub r0, r0, #3 ; adjust for taps 55 sub r0, r0, #3 ; adjust for taps
62 56
63 ldr r5, [sp, #32] ; filter_x 57 ldr r5, [sp, #32] ; filter_x
64 ldr r6, [sp, #48] ; w 58 ldr r6, [sp, #48] ; w
65 ldr r7, [sp, #52] ; h 59 ldr r7, [sp, #52] ; h
66 60
67 vld1.s16 {q0}, [r5] ; filter_x 61 vld1.s16 {q0}, [r5] ; filter_x
68 62
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
166 add r0, r0, r9 ; src += src_stride * 4 - w 160 add r0, r0, r9 ; src += src_stride * 4 - w
167 add r2, r2, r12 ; dst += dst_stride * 4 - w 161 add r2, r2, r12 ; dst += dst_stride * 4 - w
168 subs r7, r7, #4 ; h -= 4 162 subs r7, r7, #4 ; h -= 4
169 bgt vpx_convolve8_loop_horiz_v 163 bgt vpx_convolve8_loop_horiz_v
170 164
171 pop {r4-r10, pc} 165 pop {r4-r10, pc}
172 166
173 ENDP 167 ENDP
174 168
175 |vpx_convolve8_vert_neon| PROC 169 |vpx_convolve8_vert_neon| PROC
176 ldr r12, [sp, #12]
177 cmp r12, #16
178 bne vpx_convolve8_vert_c
179
180 push {r4-r8, lr} 170 push {r4-r8, lr}
181 171
182 ; adjust for taps 172 ; adjust for taps
183 sub r0, r0, r1 173 sub r0, r0, r1
184 sub r0, r0, r1, lsl #1 174 sub r0, r0, r1, lsl #1
185 175
186 ldr r4, [sp, #32] ; filter_y 176 ldr r4, [sp, #32] ; filter_y
187 ldr r6, [sp, #40] ; w 177 ldr r6, [sp, #40] ; w
188 ldr lr, [sp, #44] ; h 178 ldr lr, [sp, #44] ; h
189 179
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
271 ; outer loop 261 ; outer loop
272 add r0, r0, #4 262 add r0, r0, #4
273 add r2, r2, #4 263 add r2, r2, #4
274 subs r6, r6, #4 ; w -= 4 264 subs r6, r6, #4 ; w -= 4
275 bgt vpx_convolve8_loop_vert_h 265 bgt vpx_convolve8_loop_vert_h
276 266
277 pop {r4-r8, pc} 267 pop {r4-r8, pc}
278 268
279 ENDP 269 ENDP
280 END 270 END
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c ('k') | source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698