source/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.asm - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.asm

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 ;	1 ;

2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 ;	3 ;

4 ; Use of this source code is governed by a BSD-style license	4 ; Use of this source code is governed by a BSD-style license

5 ; that can be found in the LICENSE file in the root of the source	5 ; that can be found in the LICENSE file in the root of the source

6 ; tree. An additional intellectual property rights grant can be found	6 ; tree. An additional intellectual property rights grant can be found

7 ; in the file PATENTS. All contributing project authors may	7 ; in the file PATENTS. All contributing project authors may

8 ; be found in the AUTHORS file in the root of the source tree.	8 ; be found in the AUTHORS file in the root of the source tree.

9 ;	9 ;

10	10

11	11

12 ; These functions are only valid when:	12 ; These functions are only valid when:

13 ; x_step_q4 == 16	13 ; x_step_q4 == 16

14 ; w%4 == 0	14 ; w%4 == 0

15 ; h%4 == 0	15 ; h%4 == 0

16 ; taps == 8	16 ; taps == 8

17 ; VP9_FILTER_WEIGHT == 128	17 ; VP9_FILTER_WEIGHT == 128

18 ; VP9_FILTER_SHIFT == 7	18 ; VP9_FILTER_SHIFT == 7

19	19

20 EXPORT \|vpx_convolve8_horiz_neon\|	20 EXPORT \|vpx_convolve8_horiz_neon\|

21 EXPORT \|vpx_convolve8_vert_neon\|	21 EXPORT \|vpx_convolve8_vert_neon\|

22 IMPORT \|vpx_convolve8_horiz_c\|

23 IMPORT \|vpx_convolve8_vert_c\|

24 ARM	22 ARM

25 REQUIRE8	23 REQUIRE8

26 PRESERVE8	24 PRESERVE8

27	25

28 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2	26 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2

29	27

30 ; Multiply and accumulate by q0	28 ; Multiply and accumulate by q0

31 MACRO	29 MACRO

32 MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7	30 MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7

33 vmull.s16 $dst, $src0, d0[0]	31 vmull.s16 $dst, $src0, d0[0]

(...skipping 11 matching lines...) Expand all Loading...
45 ; r2 uint8_t *dst	43 ; r2 uint8_t *dst

46 ; r3 int dst_stride	44 ; r3 int dst_stride

47 ; sp[]const int16_t *filter_x	45 ; sp[]const int16_t *filter_x

48 ; sp[]int x_step_q4	46 ; sp[]int x_step_q4

49 ; sp[]const int16_t *filter_y ; unused	47 ; sp[]const int16_t *filter_y ; unused

50 ; sp[]int y_step_q4 ; unused	48 ; sp[]int y_step_q4 ; unused

51 ; sp[]int w	49 ; sp[]int w

52 ; sp[]int h	50 ; sp[]int h

53	51

54 \|vpx_convolve8_horiz_neon\| PROC	52 \|vpx_convolve8_horiz_neon\| PROC

55 ldr r12, [sp, #4] ; x_step_q4

56 cmp r12, #16

57 bne vpx_convolve8_horiz_c

58

59 push {r4-r10, lr}	53 push {r4-r10, lr}

60	54

61 sub r0, r0, #3 ; adjust for taps	55 sub r0, r0, #3 ; adjust for taps

62	56

63 ldr r5, [sp, #32] ; filter_x	57 ldr r5, [sp, #32] ; filter_x

64 ldr r6, [sp, #48] ; w	58 ldr r6, [sp, #48] ; w

65 ldr r7, [sp, #52] ; h	59 ldr r7, [sp, #52] ; h

66	60

67 vld1.s16 {q0}, [r5] ; filter_x	61 vld1.s16 {q0}, [r5] ; filter_x

68	62

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
166 add r0, r0, r9 ; src += src_stride * 4 - w	160 add r0, r0, r9 ; src += src_stride * 4 - w

167 add r2, r2, r12 ; dst += dst_stride * 4 - w	161 add r2, r2, r12 ; dst += dst_stride * 4 - w

168 subs r7, r7, #4 ; h -= 4	162 subs r7, r7, #4 ; h -= 4

169 bgt vpx_convolve8_loop_horiz_v	163 bgt vpx_convolve8_loop_horiz_v

170	164

171 pop {r4-r10, pc}	165 pop {r4-r10, pc}

172	166

173 ENDP	167 ENDP

174	168

175 \|vpx_convolve8_vert_neon\| PROC	169 \|vpx_convolve8_vert_neon\| PROC

176 ldr r12, [sp, #12]

177 cmp r12, #16

178 bne vpx_convolve8_vert_c

179

180 push {r4-r8, lr}	170 push {r4-r8, lr}

181	171

182 ; adjust for taps	172 ; adjust for taps

183 sub r0, r0, r1	173 sub r0, r0, r1

184 sub r0, r0, r1, lsl #1	174 sub r0, r0, r1, lsl #1

185	175

186 ldr r4, [sp, #32] ; filter_y	176 ldr r4, [sp, #32] ; filter_y

187 ldr r6, [sp, #40] ; w	177 ldr r6, [sp, #40] ; w

188 ldr lr, [sp, #44] ; h	178 ldr lr, [sp, #44] ; h

189	179

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
271 ; outer loop	261 ; outer loop

272 add r0, r0, #4	262 add r0, r0, #4

273 add r2, r2, #4	263 add r2, r2, #4

274 subs r6, r6, #4 ; w -= 4	264 subs r6, r6, #4 ; w -= 4

275 bgt vpx_convolve8_loop_vert_h	265 bgt vpx_convolve8_loop_vert_h

276	266

277 pop {r4-r8, pc}	267 pop {r4-r8, pc}

278	268

279 ENDP	269 ENDP

280 END	270 END

OLD	NEW

« no previous file with comments | « source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c ('k') | source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c » ('j') | no next file with comments »