source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm - Issue 812033011: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.asm

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 ;

2 ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 ;

4 ; Use of this source code is governed by a BSD-style license and patent

5 ; grant that can be found in the LICENSE file in the root of the source

6 ; tree. All contributing project authors may be found in the AUTHORS

7 ; file in the root of the source tree.

8 ;

9

10 EXPORT \|vp9_idct32x32_1_add_neon\|

11 ARM

12 REQUIRE8

13 PRESERVE8

14

15 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=2

16

17 ;TODO(hkuang): put the following macros in a seperate

18 ;file so other idct function could also use them.

19 MACRO

20 LD_16x8 $src, $stride

21 vld1.8 {q8}, [$src], $stride

22 vld1.8 {q9}, [$src], $stride

23 vld1.8 {q10}, [$src], $stride

24 vld1.8 {q11}, [$src], $stride

25 vld1.8 {q12}, [$src], $stride

26 vld1.8 {q13}, [$src], $stride

27 vld1.8 {q14}, [$src], $stride

28 vld1.8 {q15}, [$src], $stride

29 MEND

30

31 MACRO

32 ADD_DIFF_16x8 $diff

33 vqadd.u8 q8, q8, $diff

34 vqadd.u8 q9, q9, $diff

35 vqadd.u8 q10, q10, $diff

36 vqadd.u8 q11, q11, $diff

37 vqadd.u8 q12, q12, $diff

38 vqadd.u8 q13, q13, $diff

39 vqadd.u8 q14, q14, $diff

40 vqadd.u8 q15, q15, $diff

41 MEND

42

43 MACRO

44 SUB_DIFF_16x8 $diff

45 vqsub.u8 q8, q8, $diff

46 vqsub.u8 q9, q9, $diff

47 vqsub.u8 q10, q10, $diff

48 vqsub.u8 q11, q11, $diff

49 vqsub.u8 q12, q12, $diff

50 vqsub.u8 q13, q13, $diff

51 vqsub.u8 q14, q14, $diff

52 vqsub.u8 q15, q15, $diff

53 MEND

54

55 MACRO

56 ST_16x8 $dst, $stride

57 vst1.8 {q8}, [$dst], $stride

58 vst1.8 {q9}, [$dst], $stride

59 vst1.8 {q10},[$dst], $stride

60 vst1.8 {q11},[$dst], $stride

61 vst1.8 {q12},[$dst], $stride

62 vst1.8 {q13},[$dst], $stride

63 vst1.8 {q14},[$dst], $stride

64 vst1.8 {q15},[$dst], $stride

65 MEND

66

67 ;void vp9_idct32x32_1_add_neon(int16_t input, uint8_t dest,

68 ; int dest_stride)

69 ;

70 ; r0 int16_t input

71 ; r1 uint8_t *dest

72 ; r2 int dest_stride

73

74 \|vp9_idct32x32_1_add_neon\| PROC

75 push {lr}

76 pld [r1]

77 add r3, r1, #16 ; r3 dest + 16 for second loop

78 ldrsh r0, [r0]

79

80 ; generate cospi_16_64 = 11585

81 mov r12, #0x2d00

82 add r12, #0x41

83

84 ; out = dct_const_round_shift(input[0] * cospi_16_64)

85 mul r0, r0, r12 ; input[0] * cospi_16_64

86 add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))

87 asr r0, r0, #14 ; >> DCT_CONST_BITS

88

89 ; out = dct_const_round_shift(out * cospi_16_64)

90 mul r0, r0, r12 ; out * cospi_16_64

91 mov r12, r1 ; save dest

92 add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))

93 asr r0, r0, #14 ; >> DCT_CONST_BITS

94

95 ; a1 = ROUND_POWER_OF_TWO(out, 6)

96 add r0, r0, #32 ; + (1 <<((6) - 1))

97 asrs r0, r0, #6 ; >> 6

98 bge diff_positive_32_32

99

100 diff_negative_32_32

101 neg r0, r0

102 usat r0, #8, r0

103 vdup.u8 q0, r0

104 mov r0, #4

105

106 diff_negative_32_32_loop

107 sub r0, #1

108 LD_16x8 r1, r2

109 SUB_DIFF_16x8 q0

110 ST_16x8 r12, r2

111

112 LD_16x8 r1, r2

113 SUB_DIFF_16x8 q0

114 ST_16x8 r12, r2

115 cmp r0, #2

116 moveq r1, r3

117 moveq r12, r3

118 cmp r0, #0

119 bne diff_negative_32_32_loop

120 pop {pc}

121

122 diff_positive_32_32

123 usat r0, #8, r0

124 vdup.u8 q0, r0

125 mov r0, #4

126

127 diff_positive_32_32_loop

128 sub r0, #1

129 LD_16x8 r1, r2

130 ADD_DIFF_16x8 q0

131 ST_16x8 r12, r2

132

133 LD_16x8 r1, r2

134 ADD_DIFF_16x8 q0

135 ST_16x8 r12, r2

136 cmp r0, #2

137 moveq r1, r3

138 moveq r12, r3

139 cmp r0, #0

140 bne diff_positive_32_32_loop

141 pop {pc}

142

143 ENDP ; \|vp9_idct32x32_1_add_neon\|

144 END

OLD	NEW