OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 | |
12 EXPORT |vp8cx_pack_mb_row_tokens_armv5| | |
13 IMPORT |vp8_validate_buffer_arm| | |
14 | |
15 INCLUDE vp8_asm_enc_offsets.asm | |
16 | |
17 ARM | |
18 REQUIRE8 | |
19 PRESERVE8 | |
20 | |
21 AREA |.text|, CODE, READONLY | |
22 | |
23 | |
24 ; macro for validating write buffer position | |
25 ; needs vp8_writer in r0 | |
26 ; start shall not be in r1 | |
27 MACRO | |
28 VALIDATE_POS $start, $pos | |
29 push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call | |
30 ldr r2, [r0, #vp8_writer_buffer_end] | |
31 ldr r3, [r0, #vp8_writer_error] | |
32 mov r1, $pos | |
33 mov r0, $start | |
34 bl vp8_validate_buffer_arm | |
35 pop {r0-r3, r12, lr} | |
36 MEND | |
37 | |
38 ; r0 VP8_COMP *cpi | |
39 ; r1 vp8_writer *w | |
40 ; r2 vp8_coef_encodings | |
41 ; r3 vp8_extra_bits | |
42 ; s0 vp8_coef_tree | |
43 | |
44 |vp8cx_pack_mb_row_tokens_armv5| PROC | |
45 push {r4-r12, lr} | |
46 sub sp, sp, #24 | |
47 | |
48 ; Compute address of cpi->common.mb_rows | |
49 ldr r4, _VP8_COMP_common_ | |
50 ldr r6, _VP8_COMMON_MBrows_ | |
51 add r4, r0, r4 | |
52 | |
53 ldr r5, [r4, r6] ; load up mb_rows | |
54 | |
55 str r2, [sp, #20] ; save vp8_coef_encodings | |
56 str r5, [sp, #12] ; save mb_rows | |
57 str r3, [sp, #8] ; save vp8_extra_bits | |
58 | |
59 ldr r4, _VP8_COMP_tplist_ | |
60 add r4, r0, r4 | |
61 ldr r7, [r4, #0] ; dereference cpi->tp_list | |
62 | |
63 mov r0, r1 ; keep same as other loops | |
64 | |
65 ldr r2, [r0, #vp8_writer_lowvalue] | |
66 ldr r5, [r0, #vp8_writer_range] | |
67 ldr r3, [r0, #vp8_writer_count] | |
68 | |
69 mb_row_loop | |
70 | |
71 ldr r1, [r7, #tokenlist_start] | |
72 ldr r9, [r7, #tokenlist_stop] | |
73 str r9, [sp, #0] ; save stop for later comparison | |
74 str r7, [sp, #16] ; tokenlist address for next time | |
75 | |
76 b check_p_lt_stop | |
77 | |
78 ; actuall work gets done here! | |
79 | |
80 while_p_lt_stop | |
81 ldrb r6, [r1, #tokenextra_token] ; t | |
82 ldr r4, [sp, #20] ; vp8_coef_encodings | |
83 mov lr, #0 | |
84 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t | |
85 ldr r9, [r1, #tokenextra_context_tree] ; pp | |
86 | |
87 ldrb r7, [r1, #tokenextra_skip_eob_node] | |
88 | |
89 ldr r6, [r4, #vp8_token_value] ; v | |
90 ldr r8, [r4, #vp8_token_len] ; n | |
91 | |
92 ; vp8 specific skip_eob_node | |
93 cmp r7, #0 | |
94 movne lr, #2 ; i = 2 | |
95 subne r8, r8, #1 ; --n | |
96 | |
97 rsb r4, r8, #32 ; 32-n | |
98 ldr r10, [sp, #64] ; vp8_coef_tree | |
99 | |
100 ; v is kept in r12 during the token pack loop | |
101 lsl r12, r6, r4 ; r12 = v << 32 - n | |
102 | |
103 ; loop start | |
104 token_loop | |
105 ldrb r4, [r9, lr, asr #1] ; pp [i>>1] | |
106 sub r7, r5, #1 ; range-1 | |
107 | |
108 ; Decisions are made based on the bit value shifted | |
109 ; off of v, so set a flag here based on this. | |
110 ; This value is refered to as "bb" | |
111 lsls r12, r12, #1 ; bb = v >> n | |
112 mul r6, r4, r7 ; ((range-1) * pp[i>>1])) | |
113 | |
114 ; bb can only be 0 or 1. So only execute this statement | |
115 ; if bb == 1, otherwise it will act like i + 0 | |
116 addcs lr, lr, #1 ; i + bb | |
117 | |
118 mov r7, #1 | |
119 ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] | |
120 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) | |
121 | |
122 addcs r2, r2, r4 ; if (bb) lowvalue += split | |
123 subcs r4, r5, r4 ; if (bb) range = range-split | |
124 | |
125 ; Counting the leading zeros is used to normalize range. | |
126 clz r6, r4 | |
127 sub r6, r6, #24 ; shift | |
128 | |
129 ; Flag is set on the sum of count. This flag is used later | |
130 ; to determine if count >= 0 | |
131 adds r3, r3, r6 ; count += shift | |
132 lsl r5, r4, r6 ; range <<= shift | |
133 bmi token_count_lt_zero ; if(count >= 0) | |
134 | |
135 sub r6, r6, r3 ; offset = shift - count | |
136 sub r4, r6, #1 ; offset-1 | |
137 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000
) | |
138 bpl token_high_bit_not_set | |
139 | |
140 ldr r4, [r0, #vp8_writer_pos] ; x | |
141 sub r4, r4, #1 ; x = w->pos-1 | |
142 b token_zero_while_start | |
143 token_zero_while_loop | |
144 mov r10, #0 | |
145 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 | |
146 sub r4, r4, #1 ; x-- | |
147 token_zero_while_start | |
148 cmp r4, #0 | |
149 ldrge r7, [r0, #vp8_writer_buffer] | |
150 ldrb r11, [r7, r4] | |
151 cmpge r11, #0xff | |
152 beq token_zero_while_loop | |
153 | |
154 ldr r7, [r0, #vp8_writer_buffer] | |
155 ldrb r10, [r7, r4] ; w->buffer[x] | |
156 add r10, r10, #1 | |
157 strb r10, [r7, r4] ; w->buffer[x] + 1 | |
158 token_high_bit_not_set | |
159 rsb r4, r6, #24 ; 24-offset | |
160 ldr r10, [r0, #vp8_writer_buffer] | |
161 lsr r7, r2, r4 ; lowvalue >> (24-offset) | |
162 ldr r4, [r0, #vp8_writer_pos] ; w->pos | |
163 lsl r2, r2, r6 ; lowvalue <<= offset | |
164 mov r6, r3 ; shift = count | |
165 add r11, r4, #1 ; w->pos++ | |
166 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff | |
167 str r11, [r0, #vp8_writer_pos] | |
168 sub r3, r3, #8 ; count -= 8 | |
169 | |
170 VALIDATE_POS r10, r11 ; validate_buffer at pos | |
171 | |
172 strb r7, [r10, r4] ; w->buffer[w->pos++] | |
173 | |
174 ; r10 is used earlier in the loop, but r10 is used as | |
175 ; temp variable here. So after r10 is used, reload | |
176 ; vp8_coef_tree_dcd into r10 | |
177 ldr r10, [sp, #64] ; vp8_coef_tree | |
178 | |
179 token_count_lt_zero | |
180 lsl r2, r2, r6 ; lowvalue <<= shift | |
181 | |
182 subs r8, r8, #1 ; --n | |
183 bne token_loop | |
184 | |
185 ldrb r6, [r1, #tokenextra_token] ; t | |
186 ldr r7, [sp, #8] ; vp8_extra_bits | |
187 ; Add t * sizeof (vp8_extra_bit_struct) to get the desired | |
188 ; element. Here vp8_extra_bit_struct == 16 | |
189 add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t | |
190 | |
191 ldr r4, [r12, #vp8_extra_bit_struct_base_val] | |
192 cmp r4, #0 | |
193 beq skip_extra_bits | |
194 | |
195 ; if( b->base_val) | |
196 ldr r8, [r12, #vp8_extra_bit_struct_len] ; L | |
197 ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra | |
198 cmp r8, #0 ; if( L) | |
199 beq no_extra_bits | |
200 | |
201 ldr r9, [r12, #vp8_extra_bit_struct_prob] | |
202 asr r7, lr, #1 ; v=e>>1 | |
203 | |
204 ldr r10, [r12, #vp8_extra_bit_struct_tree] | |
205 str r10, [sp, #4] ; b->tree | |
206 | |
207 rsb r4, r8, #32 | |
208 lsl r12, r7, r4 | |
209 | |
210 mov lr, #0 ; i = 0 | |
211 | |
212 extra_bits_loop | |
213 ldrb r4, [r9, lr, asr #1] ; pp[i>>1] | |
214 sub r7, r5, #1 ; range-1 | |
215 lsls r12, r12, #1 ; v >> n | |
216 mul r6, r4, r7 ; (range-1) * pp[i>>1] | |
217 addcs lr, lr, #1 ; i + bb | |
218 | |
219 mov r7, #1 | |
220 ldrsb lr, [r10, lr] ; i = b->tree[i+bb] | |
221 add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >
> 8) | |
222 | |
223 addcs r2, r2, r4 ; if (bb) lowvalue += split | |
224 subcs r4, r5, r4 ; if (bb) range = range-split | |
225 | |
226 clz r6, r4 | |
227 sub r6, r6, #24 | |
228 | |
229 adds r3, r3, r6 ; count += shift | |
230 lsl r5, r4, r6 ; range <<= shift | |
231 bmi extra_count_lt_zero ; if(count >= 0) | |
232 | |
233 sub r6, r6, r3 ; offset= shift - count | |
234 sub r4, r6, #1 ; offset-1 | |
235 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000
) | |
236 bpl extra_high_bit_not_set | |
237 | |
238 ldr r4, [r0, #vp8_writer_pos] ; x | |
239 sub r4, r4, #1 ; x = w->pos - 1 | |
240 b extra_zero_while_start | |
241 extra_zero_while_loop | |
242 mov r10, #0 | |
243 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 | |
244 sub r4, r4, #1 ; x-- | |
245 extra_zero_while_start | |
246 cmp r4, #0 | |
247 ldrge r7, [r0, #vp8_writer_buffer] | |
248 ldrb r11, [r7, r4] | |
249 cmpge r11, #0xff | |
250 beq extra_zero_while_loop | |
251 | |
252 ldr r7, [r0, #vp8_writer_buffer] | |
253 ldrb r10, [r7, r4] | |
254 add r10, r10, #1 | |
255 strb r10, [r7, r4] | |
256 extra_high_bit_not_set | |
257 rsb r4, r6, #24 ; 24-offset | |
258 ldr r10, [r0, #vp8_writer_buffer] | |
259 lsr r7, r2, r4 ; lowvalue >> (24-offset) | |
260 ldr r4, [r0, #vp8_writer_pos] | |
261 lsl r2, r2, r6 ; lowvalue <<= offset | |
262 mov r6, r3 ; shift = count | |
263 add r11, r4, #1 ; w->pos++ | |
264 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff | |
265 str r11, [r0, #vp8_writer_pos] | |
266 sub r3, r3, #8 ; count -= 8 | |
267 | |
268 VALIDATE_POS r10, r11 ; validate_buffer at pos | |
269 | |
270 strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-o
ffset)) | |
271 ldr r10, [sp, #4] ; b->tree | |
272 extra_count_lt_zero | |
273 lsl r2, r2, r6 | |
274 | |
275 subs r8, r8, #1 ; --n | |
276 bne extra_bits_loop ; while (n) | |
277 | |
278 no_extra_bits | |
279 ldr lr, [r1, #4] ; e = p->Extra | |
280 add r4, r5, #1 ; range + 1 | |
281 tst lr, #1 | |
282 lsr r4, r4, #1 ; split = (range + 1) >> 1 | |
283 addne r2, r2, r4 ; lowvalue += split | |
284 subne r4, r5, r4 ; range = range-split | |
285 tst r2, #0x80000000 ; lowvalue & 0x80000000 | |
286 lsl r5, r4, #1 ; range <<= 1 | |
287 beq end_high_bit_not_set | |
288 | |
289 ldr r4, [r0, #vp8_writer_pos] | |
290 mov r7, #0 | |
291 sub r4, r4, #1 | |
292 b end_zero_while_start | |
293 end_zero_while_loop | |
294 strb r7, [r6, r4] | |
295 sub r4, r4, #1 ; x-- | |
296 end_zero_while_start | |
297 cmp r4, #0 | |
298 ldrge r6, [r0, #vp8_writer_buffer] | |
299 ldrb r12, [r6, r4] | |
300 cmpge r12, #0xff | |
301 beq end_zero_while_loop | |
302 | |
303 ldr r6, [r0, #vp8_writer_buffer] | |
304 ldrb r7, [r6, r4] | |
305 add r7, r7, #1 | |
306 strb r7, [r6, r4] | |
307 end_high_bit_not_set | |
308 adds r3, r3, #1 ; ++count | |
309 lsl r2, r2, #1 ; lowvalue <<= 1 | |
310 bne end_count_zero | |
311 | |
312 ldr r4, [r0, #vp8_writer_pos] | |
313 mvn r3, #7 | |
314 ldr r7, [r0, #vp8_writer_buffer] | |
315 lsr r6, r2, #24 ; lowvalue >> 24 | |
316 add r12, r4, #1 ; w->pos++ | |
317 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff | |
318 str r12, [r0, #vp8_writer_pos] | |
319 | |
320 VALIDATE_POS r7, r12 ; validate_buffer at pos | |
321 | |
322 strb r6, [r7, r4] | |
323 end_count_zero | |
324 skip_extra_bits | |
325 add r1, r1, #TOKENEXTRA_SZ ; ++p | |
326 check_p_lt_stop | |
327 ldr r4, [sp, #0] ; stop | |
328 cmp r1, r4 ; while( p < stop) | |
329 bcc while_p_lt_stop | |
330 | |
331 ldr r6, [sp, #12] ; mb_rows | |
332 ldr r7, [sp, #16] ; tokenlist address | |
333 subs r6, r6, #1 | |
334 add r7, r7, #TOKENLIST_SZ ; next element in the array | |
335 str r6, [sp, #12] | |
336 bne mb_row_loop | |
337 | |
338 str r2, [r0, #vp8_writer_lowvalue] | |
339 str r5, [r0, #vp8_writer_range] | |
340 str r3, [r0, #vp8_writer_count] | |
341 add sp, sp, #24 | |
342 pop {r4-r12, pc} | |
343 ENDP | |
344 | |
345 _VP8_COMP_common_ | |
346 DCD vp8_comp_common | |
347 _VP8_COMMON_MBrows_ | |
348 DCD vp8_common_mb_rows | |
349 _VP8_COMP_tplist_ | |
350 DCD vp8_comp_tplist | |
351 | |
352 END | |
OLD | NEW |