source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.asm - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.asm

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 ;	1 ;

2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 ;	3 ;

4 ; Use of this source code is governed by a BSD-style license and patent	4 ; Use of this source code is governed by a BSD-style license and patent

5 ; grant that can be found in the LICENSE file in the root of the source	5 ; grant that can be found in the LICENSE file in the root of the source

6 ; tree. All contributing project authors may be found in the AUTHORS	6 ; tree. All contributing project authors may be found in the AUTHORS

7 ; file in the root of the source tree.	7 ; file in the root of the source tree.

8 ;	8 ;

9	9

10 EXPORT \|vp9_idct32x32_1_add_neon\|	10 EXPORT \|vpx_idct32x32_1_add_neon\|

11 ARM	11 ARM

12 REQUIRE8	12 REQUIRE8

13 PRESERVE8	13 PRESERVE8

14	14

15 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2	15 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2

16	16

17 ;TODO(hkuang): put the following macros in a seperate	17 ;TODO(hkuang): put the following macros in a seperate

18 ;file so other idct function could also use them.	18 ;file so other idct function could also use them.

19 MACRO	19 MACRO

20 LD_16x8 $src, $stride	20 LD_16x8 $src, $stride

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
57 vst1.8 {q8}, [$dst], $stride	57 vst1.8 {q8}, [$dst], $stride

58 vst1.8 {q9}, [$dst], $stride	58 vst1.8 {q9}, [$dst], $stride

59 vst1.8 {q10},[$dst], $stride	59 vst1.8 {q10},[$dst], $stride

60 vst1.8 {q11},[$dst], $stride	60 vst1.8 {q11},[$dst], $stride

61 vst1.8 {q12},[$dst], $stride	61 vst1.8 {q12},[$dst], $stride

62 vst1.8 {q13},[$dst], $stride	62 vst1.8 {q13},[$dst], $stride

63 vst1.8 {q14},[$dst], $stride	63 vst1.8 {q14},[$dst], $stride

64 vst1.8 {q15},[$dst], $stride	64 vst1.8 {q15},[$dst], $stride

65 MEND	65 MEND

66	66

67 ;void vp9_idct32x32_1_add_neon(int16_t input, uint8_t dest,	67 ;void vpx_idct32x32_1_add_neon(int16_t input, uint8_t dest,

68 ; int dest_stride)	68 ; int dest_stride)

69 ;	69 ;

70 ; r0 int16_t input	70 ; r0 int16_t input

71 ; r1 uint8_t *dest	71 ; r1 uint8_t *dest

72 ; r2 int dest_stride	72 ; r2 int dest_stride

73	73

74 \|vp9_idct32x32_1_add_neon\| PROC	74 \|vpx_idct32x32_1_add_neon\| PROC

75 push {lr}	75 push {lr}

76 pld [r1]	76 pld [r1]

77 add r3, r1, #16 ; r3 dest + 16 for second loop	77 add r3, r1, #16 ; r3 dest + 16 for second loop

78 ldrsh r0, [r0]	78 ldrsh r0, [r0]

79	79

80 ; generate cospi_16_64 = 11585	80 ; generate cospi_16_64 = 11585

81 mov r12, #0x2d00	81 mov r12, #0x2d00

82 add r12, #0x41	82 add r12, #0x41

83	83

84 ; out = dct_const_round_shift(input[0] * cospi_16_64)	84 ; out = dct_const_round_shift(input[0] * cospi_16_64)

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
133 LD_16x8 r1, r2	133 LD_16x8 r1, r2

134 ADD_DIFF_16x8 q0	134 ADD_DIFF_16x8 q0

135 ST_16x8 r12, r2	135 ST_16x8 r12, r2

136 cmp r0, #2	136 cmp r0, #2

137 moveq r1, r3	137 moveq r1, r3

138 moveq r12, r3	138 moveq r12, r3

139 cmp r0, #0	139 cmp r0, #0

140 bne diff_positive_32_32_loop	140 bne diff_positive_32_32_loop

141 pop {pc}	141 pop {pc}

142	142

143 ENDP ; \|vp9_idct32x32_1_add_neon\|	143 ENDP ; \|vpx_idct32x32_1_add_neon\|

144 END	144 END

OLD	NEW

« no previous file with comments | « source/libvpx/vpx_dsp/arm/idct16x16_neon.c ('k') | source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c » ('j') | no next file with comments »