OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 | |
12 EXPORT |vp8_fast_quantize_b_armv6| | |
13 | |
14 INCLUDE vp8_asm_enc_offsets.asm | |
15 | |
16 ARM | |
17 REQUIRE8 | |
18 PRESERVE8 | |
19 | |
20 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
21 | |
22 ; r0 BLOCK *b | |
23 ; r1 BLOCKD *d | |
24 |vp8_fast_quantize_b_armv6| PROC | |
25 stmfd sp!, {r1, r4-r11, lr} | |
26 | |
27 ldr r3, [r0, #vp8_block_coeff] ; coeff | |
28 ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast | |
29 ldr r5, [r0, #vp8_block_round] ; round | |
30 ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff | |
31 ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff | |
32 ldr r8, [r1, #vp8_blockd_dequant] ; dequant | |
33 | |
34 ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction | |
35 ; is used to update the counter so that | |
36 ; it can be used to mark nonzero | |
37 ; quantized coefficient pairs. | |
38 | |
39 mov r1, #0 ; flags for quantized coeffs | |
40 | |
41 ; PART 1: quantization and dequantization loop | |
42 loop | |
43 ldr r9, [r3], #4 ; [z1 | z0] | |
44 ldr r10, [r5], #4 ; [r1 | r0] | |
45 ldr r11, [r4], #4 ; [q1 | q0] | |
46 | |
47 ssat16 lr, #1, r9 ; [sz1 | sz0] | |
48 eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0] | |
49 ssub16 r9, r9, lr ; x = (z ^ sz) - sz | |
50 sadd16 r9, r9, r10 ; [x1+r1 | x0+r0] | |
51 | |
52 ldr r12, [r3], #4 ; [z3 | z2] | |
53 | |
54 smulbb r0, r9, r11 ; [(x0+r0)*q0] | |
55 smultt r9, r9, r11 ; [(x1+r1)*q1] | |
56 | |
57 ldr r10, [r5], #4 ; [r3 | r2] | |
58 | |
59 ssat16 r11, #1, r12 ; [sz3 | sz2] | |
60 eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2] | |
61 pkhtb r0, r9, r0, asr #16 ; [y1 | y0] | |
62 ldr r9, [r4], #4 ; [q3 | q2] | |
63 ssub16 r12, r12, r11 ; x = (z ^ sz) - sz | |
64 | |
65 sadd16 r12, r12, r10 ; [x3+r3 | x2+r2] | |
66 | |
67 eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)] | |
68 | |
69 smulbb r10, r12, r9 ; [(x2+r2)*q2] | |
70 smultt r12, r12, r9 ; [(x3+r3)*q3] | |
71 | |
72 ssub16 r0, r0, lr ; x = (y ^ sz) - sz | |
73 | |
74 cmp r0, #0 ; check if zero | |
75 orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs | |
76 | |
77 str r0, [r6], #4 ; *qcoeff++ = x | |
78 ldr r9, [r8], #4 ; [dq1 | dq0] | |
79 | |
80 pkhtb r10, r12, r10, asr #16 ; [y3 | y2] | |
81 eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)] | |
82 ssub16 r10, r10, r11 ; x = (y ^ sz) - sz | |
83 | |
84 cmp r10, #0 ; check if zero | |
85 orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs | |
86 | |
87 str r10, [r6], #4 ; *qcoeff++ = x | |
88 ldr r11, [r8], #4 ; [dq3 | dq2] | |
89 | |
90 smulbb r12, r0, r9 ; [x0*dq0] | |
91 smultt r0, r0, r9 ; [x1*dq1] | |
92 | |
93 smulbb r9, r10, r11 ; [x2*dq2] | |
94 smultt r10, r10, r11 ; [x3*dq3] | |
95 | |
96 lsls r2, r2, #2 ; update loop counter | |
97 strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0] | |
98 strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1] | |
99 strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2] | |
100 strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3] | |
101 add r7, r7, #8 ; dqcoeff += 8 | |
102 bne loop | |
103 | |
104 ; PART 2: check position for eob... | |
105 ldr r11, [sp, #0] ; restore BLOCKD pointer | |
106 mov lr, #0 ; init eob | |
107 cmp r1, #0 ; coeffs after quantization? | |
108 ldr r12, [r11, #vp8_blockd_eob] | |
109 beq end ; skip eob calculations if all zero | |
110 | |
111 ldr r0, [r11, #vp8_blockd_qcoeff] | |
112 | |
113 ; check shortcut for nonzero qcoeffs | |
114 tst r1, #0x80 | |
115 bne quant_coeff_15_14 | |
116 tst r1, #0x20 | |
117 bne quant_coeff_13_11 | |
118 tst r1, #0x8 | |
119 bne quant_coeff_12_7 | |
120 tst r1, #0x40 | |
121 bne quant_coeff_10_9 | |
122 tst r1, #0x10 | |
123 bne quant_coeff_8_3 | |
124 tst r1, #0x2 | |
125 bne quant_coeff_6_5 | |
126 tst r1, #0x4 | |
127 bne quant_coeff_4_2 | |
128 b quant_coeff_1_0 | |
129 | |
130 quant_coeff_15_14 | |
131 ldrh r2, [r0, #30] ; rc=15, i=15 | |
132 mov lr, #16 | |
133 cmp r2, #0 | |
134 bne end | |
135 | |
136 ldrh r3, [r0, #28] ; rc=14, i=14 | |
137 mov lr, #15 | |
138 cmp r3, #0 | |
139 bne end | |
140 | |
141 quant_coeff_13_11 | |
142 ldrh r2, [r0, #22] ; rc=11, i=13 | |
143 mov lr, #14 | |
144 cmp r2, #0 | |
145 bne end | |
146 | |
147 quant_coeff_12_7 | |
148 ldrh r3, [r0, #14] ; rc=7, i=12 | |
149 mov lr, #13 | |
150 cmp r3, #0 | |
151 bne end | |
152 | |
153 ldrh r2, [r0, #20] ; rc=10, i=11 | |
154 mov lr, #12 | |
155 cmp r2, #0 | |
156 bne end | |
157 | |
158 quant_coeff_10_9 | |
159 ldrh r3, [r0, #26] ; rc=13, i=10 | |
160 mov lr, #11 | |
161 cmp r3, #0 | |
162 bne end | |
163 | |
164 ldrh r2, [r0, #24] ; rc=12, i=9 | |
165 mov lr, #10 | |
166 cmp r2, #0 | |
167 bne end | |
168 | |
169 quant_coeff_8_3 | |
170 ldrh r3, [r0, #18] ; rc=9, i=8 | |
171 mov lr, #9 | |
172 cmp r3, #0 | |
173 bne end | |
174 | |
175 ldrh r2, [r0, #12] ; rc=6, i=7 | |
176 mov lr, #8 | |
177 cmp r2, #0 | |
178 bne end | |
179 | |
180 quant_coeff_6_5 | |
181 ldrh r3, [r0, #6] ; rc=3, i=6 | |
182 mov lr, #7 | |
183 cmp r3, #0 | |
184 bne end | |
185 | |
186 ldrh r2, [r0, #4] ; rc=2, i=5 | |
187 mov lr, #6 | |
188 cmp r2, #0 | |
189 bne end | |
190 | |
191 quant_coeff_4_2 | |
192 ldrh r3, [r0, #10] ; rc=5, i=4 | |
193 mov lr, #5 | |
194 cmp r3, #0 | |
195 bne end | |
196 | |
197 ldrh r2, [r0, #16] ; rc=8, i=3 | |
198 mov lr, #4 | |
199 cmp r2, #0 | |
200 bne end | |
201 | |
202 ldrh r3, [r0, #8] ; rc=4, i=2 | |
203 mov lr, #3 | |
204 cmp r3, #0 | |
205 bne end | |
206 | |
207 quant_coeff_1_0 | |
208 ldrh r2, [r0, #2] ; rc=1, i=1 | |
209 mov lr, #2 | |
210 cmp r2, #0 | |
211 bne end | |
212 | |
213 mov lr, #1 ; rc=0, i=0 | |
214 | |
215 end | |
216 strb lr, [r12] | |
217 ldmfd sp!, {r1, r4-r11, pc} | |
218 | |
219 ENDP | |
220 | |
221 loop_count | |
222 DCD 0x1000000 | |
223 | |
224 END | |
225 | |
OLD | NEW |