source/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm - Issue 756673003: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.asm

Issue 756673003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 ;

2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.

3 ;

4 ; Use of this source code is governed by a BSD-style license

5 ; that can be found in the LICENSE file in the root of the source

6 ; tree. An additional intellectual property rights grant can be found

7 ; in the file PATENTS. All contributing project authors may

8 ; be found in the AUTHORS file in the root of the source tree.

9 ;

10

11

12 EXPORT \|vp8_fast_quantize_b_neon\|

13 EXPORT \|vp8_fast_quantize_b_pair_neon\|

14

15 INCLUDE vp8_asm_enc_offsets.asm

16

17 ARM

18 REQUIRE8

19 PRESERVE8

20

21 AREA \|\|.text\|\|, CODE, READONLY, ALIGN=4

22

23 ;vp8_fast_quantize_b_pair_neon(BLOCK b1, BLOCK b2, BLOCKD d1, BLOCKD d2);

24 \|vp8_fast_quantize_b_pair_neon\| PROC

25

26 stmfd sp!, {r4-r9}

27 vstmdb sp!, {q4-q7}

28

29 ldr r4, [r0, #vp8_block_coeff]

30 ldr r5, [r0, #vp8_block_quant_fast]

31 ldr r6, [r0, #vp8_block_round]

32

33 vld1.16 {q0, q1}, [r4@128] ; load z

34

35 ldr r7, [r2, #vp8_blockd_qcoeff]

36

37 vabs.s16 q4, q0 ; calculate x = abs(z)

38 vabs.s16 q5, q1

39

40 ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negativ e

41 vshr.s16 q2, q0, #15 ; sz

42 vshr.s16 q3, q1, #15

43

44 vld1.s16 {q6, q7}, [r6@128] ; load round_ptr [0-15]

45 vld1.s16 {q8, q9}, [r5@128] ; load quant_ptr [0-15]

46

47 ldr r4, [r1, #vp8_block_coeff]

48

49 vadd.s16 q4, q6 ; x + Round

50 vadd.s16 q5, q7

51

52 vld1.16 {q0, q1}, [r4@128] ; load z2

53

54 vqdmulh.s16 q4, q8 ; y = ((Round+abs(z)) * Quant) >> 16

55 vqdmulh.s16 q5, q9

56

57 vabs.s16 q10, q0 ; calculate x2 = abs(z_2)

58 vabs.s16 q11, q1

59 vshr.s16 q12, q0, #15 ; sz2

60 vshr.s16 q13, q1, #15

61

62 ;modify data to have its original sign

63 veor.s16 q4, q2 ; y^sz

64 veor.s16 q5, q3

65

66 vadd.s16 q10, q6 ; x2 + Round

67 vadd.s16 q11, q7

68

69 ldr r8, [r2, #vp8_blockd_dequant]

70

71 vqdmulh.s16 q10, q8 ; y2 = ((Round+abs(z)) * Quant) >> 16

72 vqdmulh.s16 q11, q9

73

74 vshr.s16 q4, #1 ; right shift 1 after vqdmulh

75 vshr.s16 q5, #1

76

77 vld1.s16 {q6, q7}, [r8@128] ;load dequant_ptr[i]

78

79 vsub.s16 q4, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's comple ment)

80 vsub.s16 q5, q3

81

82 vshr.s16 q10, #1 ; right shift 1 after vqdmulh

83 vshr.s16 q11, #1

84

85 ldr r9, [r2, #vp8_blockd_dqcoeff]

86

87 veor.s16 q10, q12 ; y2^sz2

88 veor.s16 q11, q13

89

90 vst1.s16 {q4, q5}, [r7] ; store: qcoeff = x1

91

92

93 vsub.s16 q10, q12 ; x2=(y^sz)-sz = (y^sz)-(-1) (2's comple ment)

94 vsub.s16 q11, q13

95

96 ldr r6, [r3, #vp8_blockd_qcoeff]

97

98 vmul.s16 q2, q6, q4 ; x * Dequant

99 vmul.s16 q3, q7, q5

100

101 adr r0, inv_zig_zag ; load ptr of inverse zigzag table

102

103 vceq.s16 q8, q8 ; set q8 to all 1

104

105 vst1.s16 {q10, q11}, [r6] ; store: qcoeff = x2

106

107 vmul.s16 q12, q6, q10 ; x2 * Dequant

108 vmul.s16 q13, q7, q11

109

110 vld1.16 {q6, q7}, [r0@128] ; load inverse scan order

111

112 vtst.16 q14, q4, q8 ; now find eob

113 vtst.16 q15, q5, q8 ; non-zero element is set to all 1

114

115 vst1.s16 {q2, q3}, [r9] ; store dqcoeff = x * Dequant

116

117 ldr r7, [r3, #vp8_blockd_dqcoeff]

118

119 vand q0, q6, q14 ; get all valid numbers from scan array

120 vand q1, q7, q15

121

122 vst1.s16 {q12, q13}, [r7] ; store dqcoeff = x * Dequant

123

124 vtst.16 q2, q10, q8 ; now find eob

125 vtst.16 q3, q11, q8 ; non-zero element is set to all 1

126

127 vmax.u16 q0, q0, q1 ; find maximum value in q0, q1

128

129 vand q10, q6, q2 ; get all valid numbers from scan array

130 vand q11, q7, q3

131 vmax.u16 q10, q10, q11 ; find maximum value in q10, q11

132

133 vmax.u16 d0, d0, d1

134 vmax.u16 d20, d20, d21

135 vmovl.u16 q0, d0

136 vmovl.u16 q10, d20

137

138 vmax.u32 d0, d0, d1

139 vmax.u32 d20, d20, d21

140 vpmax.u32 d0, d0, d0

141 vpmax.u32 d20, d20, d20

142

143 ldr r4, [r2, #vp8_blockd_eob]

144 ldr r5, [r3, #vp8_blockd_eob]

145

146 vst1.8 {d0[0]}, [r4] ; store eob

147 vst1.8 {d20[0]}, [r5] ; store eob

148

149 vldmia sp!, {q4-q7}

150 ldmfd sp!, {r4-r9}

151 bx lr

152

153 ENDP

154

155 ;void vp8_fast_quantize_b_c(BLOCK b, BLOCKD d)

156 \|vp8_fast_quantize_b_neon\| PROC

157

158 stmfd sp!, {r4-r7}

159

160 ldr r3, [r0, #vp8_block_coeff]

161 ldr r4, [r0, #vp8_block_quant_fast]

162 ldr r5, [r0, #vp8_block_round]

163

164 vld1.16 {q0, q1}, [r3@128] ; load z

165 vorr.s16 q14, q0, q1 ; check if all zero (step 1)

166 ldr r6, [r1, #vp8_blockd_qcoeff]

167 ldr r7, [r1, #vp8_blockd_dqcoeff]

168 vorr.s16 d28, d28, d29 ; check if all zero (step 2)

169

170 vabs.s16 q12, q0 ; calculate x = abs(z)

171 vabs.s16 q13, q1

172

173 ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negativ e

174 vshr.s16 q2, q0, #15 ; sz

175 vmov r2, r3, d28 ; check if all zero (step 3)

176 vshr.s16 q3, q1, #15

177

178 vld1.s16 {q14, q15}, [r5@128]; load round_ptr [0-15]

179 vld1.s16 {q8, q9}, [r4@128] ; load quant_ptr [0-15]

180

181 vadd.s16 q12, q14 ; x + Round

182 vadd.s16 q13, q15

183

184 adr r0, inv_zig_zag ; load ptr of inverse zigzag table

185

186 vqdmulh.s16 q12, q8 ; y = ((Round+abs(z)) * Quant) >> 16

187 vqdmulh.s16 q13, q9

188

189 vld1.16 {q10, q11}, [r0@128]; load inverse scan order

190

191 vceq.s16 q8, q8 ; set q8 to all 1

192

193 ldr r4, [r1, #vp8_blockd_dequant]

194

195 vshr.s16 q12, #1 ; right shift 1 after vqdmulh

196 vshr.s16 q13, #1

197

198 ldr r5, [r1, #vp8_blockd_eob]

199

200 orr r2, r2, r3 ; check if all zero (step 4)

201 cmp r2, #0 ; check if all zero (step 5)

202 beq zero_output ; check if all zero (step 6)

203

204 ;modify data to have its original sign

205 veor.s16 q12, q2 ; y^sz

206 veor.s16 q13, q3

207

208 vsub.s16 q12, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's comple ment)

209 vsub.s16 q13, q3

210

211 vld1.s16 {q2, q3}, [r4@128] ; load dequant_ptr[i]

212

213 vtst.16 q14, q12, q8 ; now find eob

214 vtst.16 q15, q13, q8 ; non-zero element is set to all 1

215

216 vst1.s16 {q12, q13}, [r6@128]; store: qcoeff = x1

217

218 vand q10, q10, q14 ; get all valid numbers from scan array

219 vand q11, q11, q15

220

221

222 vmax.u16 q0, q10, q11 ; find maximum value in q0, q1

223 vmax.u16 d0, d0, d1

224 vmovl.u16 q0, d0

225

226 vmul.s16 q2, q12 ; x * Dequant

227 vmul.s16 q3, q13

228

229 vmax.u32 d0, d0, d1

230 vpmax.u32 d0, d0, d0

231

232 vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant

233

234 vst1.8 {d0[0]}, [r5] ; store eob

235

236 ldmfd sp!, {r4-r7}

237 bx lr

238

239 zero_output

240 strb r2, [r5] ; store eob

241 vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0

242 vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0

243

244 ldmfd sp!, {r4-r7}

245 bx lr

246

247 ENDP

248

249 ; default inverse zigzag table is defined in vp8/common/entropy.c

250 ALIGN 16 ; enable use of @128 bit aligned loads

251 inv_zig_zag

252 DCW 0x0001, 0x0002, 0x0006, 0x0007

253 DCW 0x0003, 0x0005, 0x0008, 0x000d

254 DCW 0x0004, 0x0009, 0x000c, 0x000e

255 DCW 0x000a, 0x000b, 0x000f, 0x0010

256

257 END

258

OLD	NEW

« no previous file with comments | « source/libvpx/vp8/encoder/arm/boolhuff_arm.c ('k') | source/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c » ('j') | no next file with comments »