OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 | |
12 EXPORT |vp8cx_pack_tokens_into_partitions_armv5| | |
13 IMPORT |vp8_validate_buffer_arm| | |
14 | |
15 INCLUDE vp8_asm_enc_offsets.asm | |
16 | |
17 ARM | |
18 REQUIRE8 | |
19 PRESERVE8 | |
20 | |
21 AREA |.text|, CODE, READONLY | |
22 | |
23 ; macro for validating write buffer position | |
24 ; needs vp8_writer in r0 | |
25 ; start shall not be in r1 | |
26 MACRO | |
27 VALIDATE_POS $start, $pos | |
28 push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call | |
29 ldr r2, [r0, #vp8_writer_buffer_end] | |
30 ldr r3, [r0, #vp8_writer_error] | |
31 mov r1, $pos | |
32 mov r0, $start | |
33 bl vp8_validate_buffer_arm | |
34 pop {r0-r3, r12, lr} | |
35 MEND | |
36 | |
37 ; r0 VP8_COMP *cpi | |
38 ; r1 unsigned char *cx_data | |
39 ; r2 const unsigned char *cx_data_end | |
40 ; r3 int num_part | |
41 ; s0 vp8_coef_encodings | |
42 ; s1 vp8_extra_bits, | |
43 ; s2 const vp8_tree_index * | |
44 | |
45 |vp8cx_pack_tokens_into_partitions_armv5| PROC | |
46 push {r4-r12, lr} | |
47 sub sp, sp, #40 | |
48 | |
49 ; Compute address of cpi->common.mb_rows | |
50 ldr r4, _VP8_COMP_common_ | |
51 ldr r6, _VP8_COMMON_MBrows_ | |
52 add r4, r0, r4 | |
53 | |
54 ldr r5, [r4, r6] ; load up mb_rows | |
55 | |
56 str r5, [sp, #36] ; save mb_rows | |
57 str r1, [sp, #24] ; save ptr = cx_data | |
58 str r3, [sp, #20] ; save num_part | |
59 str r2, [sp, #8] ; save cx_data_end | |
60 | |
61 ldr r4, _VP8_COMP_tplist_ | |
62 add r4, r0, r4 | |
63 ldr r7, [r4, #0] ; dereference cpi->tp_list | |
64 str r7, [sp, #32] ; store start of cpi->tp_list | |
65 | |
66 ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi | |
67 add r0, r0, r11 | |
68 | |
69 mov r11, #0 | |
70 str r11, [sp, #28] ; i | |
71 | |
72 numparts_loop | |
73 ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer) | |
74 add r0, r2 ; bc[i + 1] | |
75 | |
76 ldr r10, [sp, #24] ; ptr | |
77 ldr r5, [sp, #36] ; move mb_rows to the counting section | |
78 subs r5, r5, r11 ; move start point with each partition | |
79 ; mb_rows starts at i | |
80 str r5, [sp, #12] | |
81 | |
82 ; Reset all of the VP8 Writer data for each partition that | |
83 ; is processed. | |
84 ; start_encode | |
85 | |
86 ldr r3, [sp, #8] | |
87 str r3, [r0, #vp8_writer_buffer_end] | |
88 | |
89 mov r2, #0 ; vp8_writer_lowvalue | |
90 mov r5, #255 ; vp8_writer_range | |
91 mvn r3, #23 ; vp8_writer_count | |
92 | |
93 str r2, [r0, #vp8_writer_pos] | |
94 str r10, [r0, #vp8_writer_buffer] | |
95 | |
96 ble end_partition ; if (mb_rows <= 0) end partition | |
97 | |
98 mb_row_loop | |
99 | |
100 ldr r1, [r7, #tokenlist_start] | |
101 ldr r9, [r7, #tokenlist_stop] | |
102 str r9, [sp, #0] ; save stop for later comparison | |
103 str r7, [sp, #16] ; tokenlist address for next time | |
104 | |
105 b check_p_lt_stop | |
106 | |
107 ; actual work gets done here! | |
108 | |
109 while_p_lt_stop | |
110 ldrb r6, [r1, #tokenextra_token] ; t | |
111 ldr r4, [sp, #80] ; vp8_coef_encodings | |
112 mov lr, #0 | |
113 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t | |
114 ldr r9, [r1, #tokenextra_context_tree] ; pp | |
115 | |
116 ldrb r7, [r1, #tokenextra_skip_eob_node] | |
117 | |
118 ldr r6, [r4, #vp8_token_value] ; v | |
119 ldr r8, [r4, #vp8_token_len] ; n | |
120 | |
121 ; vp8 specific skip_eob_node | |
122 cmp r7, #0 | |
123 movne lr, #2 ; i = 2 | |
124 subne r8, r8, #1 ; --n | |
125 | |
126 rsb r4, r8, #32 ; 32-n | |
127 ldr r10, [sp, #88] ; vp8_coef_tree | |
128 | |
129 ; v is kept in r12 during the token pack loop | |
130 lsl r12, r6, r4 ; r12 = v << 32 - n | |
131 | |
132 ; loop start | |
133 token_loop | |
134 ldrb r4, [r9, lr, asr #1] ; pp [i>>1] | |
135 sub r7, r5, #1 ; range-1 | |
136 | |
137 ; Decisions are made based on the bit value shifted | |
138 ; off of v, so set a flag here based on this. | |
139 ; This value is refered to as "bb" | |
140 lsls r12, r12, #1 ; bb = v >> n | |
141 mul r6, r4, r7 ; ((range-1) * pp[i>>1])) | |
142 | |
143 ; bb can only be 0 or 1. So only execute this statement | |
144 ; if bb == 1, otherwise it will act like i + 0 | |
145 addcs lr, lr, #1 ; i + bb | |
146 | |
147 mov r7, #1 | |
148 ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] | |
149 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) | |
150 | |
151 addcs r2, r2, r4 ; if (bb) lowvalue += split | |
152 subcs r4, r5, r4 ; if (bb) range = range-split | |
153 | |
154 ; Counting the leading zeros is used to normalize range. | |
155 clz r6, r4 | |
156 sub r6, r6, #24 ; shift | |
157 | |
158 ; Flag is set on the sum of count. This flag is used later | |
159 ; to determine if count >= 0 | |
160 adds r3, r3, r6 ; count += shift | |
161 lsl r5, r4, r6 ; range <<= shift | |
162 bmi token_count_lt_zero ; if(count >= 0) | |
163 | |
164 sub r6, r6, r3 ; offset = shift - count | |
165 sub r4, r6, #1 ; offset-1 | |
166 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000
) | |
167 bpl token_high_bit_not_set | |
168 | |
169 ldr r4, [r0, #vp8_writer_pos] ; x | |
170 sub r4, r4, #1 ; x = w->pos-1 | |
171 b token_zero_while_start | |
172 token_zero_while_loop | |
173 mov r10, #0 | |
174 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 | |
175 sub r4, r4, #1 ; x-- | |
176 token_zero_while_start | |
177 cmp r4, #0 | |
178 ldrge r7, [r0, #vp8_writer_buffer] | |
179 ldrb r11, [r7, r4] | |
180 cmpge r11, #0xff | |
181 beq token_zero_while_loop | |
182 | |
183 ldr r7, [r0, #vp8_writer_buffer] | |
184 ldrb r10, [r7, r4] ; w->buffer[x] | |
185 add r10, r10, #1 | |
186 strb r10, [r7, r4] ; w->buffer[x] + 1 | |
187 token_high_bit_not_set | |
188 rsb r4, r6, #24 ; 24-offset | |
189 ldr r10, [r0, #vp8_writer_buffer] | |
190 lsr r7, r2, r4 ; lowvalue >> (24-offset) | |
191 ldr r4, [r0, #vp8_writer_pos] ; w->pos | |
192 lsl r2, r2, r6 ; lowvalue <<= offset | |
193 mov r6, r3 ; shift = count | |
194 add r11, r4, #1 ; w->pos++ | |
195 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff | |
196 str r11, [r0, #vp8_writer_pos] | |
197 sub r3, r3, #8 ; count -= 8 | |
198 | |
199 VALIDATE_POS r10, r11 ; validate_buffer at pos | |
200 | |
201 strb r7, [r10, r4] ; w->buffer[w->pos++] | |
202 | |
203 ; r10 is used earlier in the loop, but r10 is used as | |
204 ; temp variable here. So after r10 is used, reload | |
205 ; vp8_coef_tree_dcd into r10 | |
206 ldr r10, [sp, #88] ; vp8_coef_tree | |
207 | |
208 token_count_lt_zero | |
209 lsl r2, r2, r6 ; lowvalue <<= shift | |
210 | |
211 subs r8, r8, #1 ; --n | |
212 bne token_loop | |
213 | |
214 ldrb r6, [r1, #tokenextra_token] ; t | |
215 ldr r7, [sp, #84] ; vp8_extra_bits | |
216 ; Add t * sizeof (vp8_extra_bit_struct) to get the desired | |
217 ; element. Here vp8_extra_bit_struct == 16 | |
218 add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t | |
219 | |
220 ldr r4, [r12, #vp8_extra_bit_struct_base_val] | |
221 cmp r4, #0 | |
222 beq skip_extra_bits | |
223 | |
224 ; if( b->base_val) | |
225 ldr r8, [r12, #vp8_extra_bit_struct_len] ; L | |
226 ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra | |
227 cmp r8, #0 ; if( L) | |
228 beq no_extra_bits | |
229 | |
230 ldr r9, [r12, #vp8_extra_bit_struct_prob] | |
231 asr r7, lr, #1 ; v=e>>1 | |
232 | |
233 ldr r10, [r12, #vp8_extra_bit_struct_tree] | |
234 str r10, [sp, #4] ; b->tree | |
235 | |
236 rsb r4, r8, #32 | |
237 lsl r12, r7, r4 | |
238 | |
239 mov lr, #0 ; i = 0 | |
240 | |
241 extra_bits_loop | |
242 ldrb r4, [r9, lr, asr #1] ; pp[i>>1] | |
243 sub r7, r5, #1 ; range-1 | |
244 lsls r12, r12, #1 ; v >> n | |
245 mul r6, r4, r7 ; (range-1) * pp[i>>1] | |
246 addcs lr, lr, #1 ; i + bb | |
247 | |
248 mov r7, #1 | |
249 ldrsb lr, [r10, lr] ; i = b->tree[i+bb] | |
250 add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >
> 8) | |
251 | |
252 addcs r2, r2, r4 ; if (bb) lowvalue += split | |
253 subcs r4, r5, r4 ; if (bb) range = range-split | |
254 | |
255 clz r6, r4 | |
256 sub r6, r6, #24 | |
257 | |
258 adds r3, r3, r6 ; count += shift | |
259 lsl r5, r4, r6 ; range <<= shift | |
260 bmi extra_count_lt_zero ; if(count >= 0) | |
261 | |
262 sub r6, r6, r3 ; offset= shift - count | |
263 sub r4, r6, #1 ; offset-1 | |
264 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000
) | |
265 bpl extra_high_bit_not_set | |
266 | |
267 ldr r4, [r0, #vp8_writer_pos] ; x | |
268 sub r4, r4, #1 ; x = w->pos - 1 | |
269 b extra_zero_while_start | |
270 extra_zero_while_loop | |
271 mov r10, #0 | |
272 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 | |
273 sub r4, r4, #1 ; x-- | |
274 extra_zero_while_start | |
275 cmp r4, #0 | |
276 ldrge r7, [r0, #vp8_writer_buffer] | |
277 ldrb r11, [r7, r4] | |
278 cmpge r11, #0xff | |
279 beq extra_zero_while_loop | |
280 | |
281 ldr r7, [r0, #vp8_writer_buffer] | |
282 ldrb r10, [r7, r4] | |
283 add r10, r10, #1 | |
284 strb r10, [r7, r4] | |
285 extra_high_bit_not_set | |
286 rsb r4, r6, #24 ; 24-offset | |
287 ldr r10, [r0, #vp8_writer_buffer] | |
288 lsr r7, r2, r4 ; lowvalue >> (24-offset) | |
289 ldr r4, [r0, #vp8_writer_pos] | |
290 lsl r2, r2, r6 ; lowvalue <<= offset | |
291 mov r6, r3 ; shift = count | |
292 add r11, r4, #1 ; w->pos++ | |
293 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff | |
294 str r11, [r0, #vp8_writer_pos] | |
295 sub r3, r3, #8 ; count -= 8 | |
296 | |
297 VALIDATE_POS r10, r11 ; validate_buffer at pos | |
298 | |
299 strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-o
ffset)) | |
300 ldr r10, [sp, #4] ; b->tree | |
301 extra_count_lt_zero | |
302 lsl r2, r2, r6 | |
303 | |
304 subs r8, r8, #1 ; --n | |
305 bne extra_bits_loop ; while (n) | |
306 | |
307 no_extra_bits | |
308 ldr lr, [r1, #4] ; e = p->Extra | |
309 add r4, r5, #1 ; range + 1 | |
310 tst lr, #1 | |
311 lsr r4, r4, #1 ; split = (range + 1) >> 1 | |
312 addne r2, r2, r4 ; lowvalue += split | |
313 subne r4, r5, r4 ; range = range-split | |
314 tst r2, #0x80000000 ; lowvalue & 0x80000000 | |
315 lsl r5, r4, #1 ; range <<= 1 | |
316 beq end_high_bit_not_set | |
317 | |
318 ldr r4, [r0, #vp8_writer_pos] | |
319 mov r7, #0 | |
320 sub r4, r4, #1 | |
321 b end_zero_while_start | |
322 end_zero_while_loop | |
323 strb r7, [r6, r4] | |
324 sub r4, r4, #1 ; x-- | |
325 end_zero_while_start | |
326 cmp r4, #0 | |
327 ldrge r6, [r0, #vp8_writer_buffer] | |
328 ldrb r12, [r6, r4] | |
329 cmpge r12, #0xff | |
330 beq end_zero_while_loop | |
331 | |
332 ldr r6, [r0, #vp8_writer_buffer] | |
333 ldrb r7, [r6, r4] | |
334 add r7, r7, #1 | |
335 strb r7, [r6, r4] | |
336 end_high_bit_not_set | |
337 adds r3, r3, #1 ; ++count | |
338 lsl r2, r2, #1 ; lowvalue <<= 1 | |
339 bne end_count_zero | |
340 | |
341 ldr r4, [r0, #vp8_writer_pos] | |
342 mvn r3, #7 ; count = -8 | |
343 ldr r7, [r0, #vp8_writer_buffer] | |
344 lsr r6, r2, #24 ; lowvalue >> 24 | |
345 add r12, r4, #1 ; w->pos++ | |
346 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff | |
347 str r12, [r0, #vp8_writer_pos] | |
348 | |
349 VALIDATE_POS r7, r12 ; validate_buffer at pos | |
350 | |
351 strb r6, [r7, r4] | |
352 end_count_zero | |
353 skip_extra_bits | |
354 add r1, r1, #TOKENEXTRA_SZ ; ++p | |
355 check_p_lt_stop | |
356 ldr r4, [sp, #0] ; stop | |
357 cmp r1, r4 ; while( p < stop) | |
358 bcc while_p_lt_stop | |
359 | |
360 ldr r10, [sp, #20] ; num_parts | |
361 mov r1, #TOKENLIST_SZ | |
362 mul r1, r10, r1 | |
363 | |
364 ldr r6, [sp, #12] ; mb_rows | |
365 ldr r7, [sp, #16] ; tokenlist address | |
366 subs r6, r6, r10 | |
367 add r7, r7, r1 ; next element in the array | |
368 str r6, [sp, #12] | |
369 bgt mb_row_loop | |
370 | |
371 end_partition | |
372 mov r12, #32 | |
373 | |
374 stop_encode_loop | |
375 sub r7, r5, #1 ; range-1 | |
376 | |
377 mov r4, r7, lsl #7 ; ((range-1) * 128) | |
378 | |
379 mov r7, #1 | |
380 add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) | |
381 | |
382 ; Counting the leading zeros is used to normalize range. | |
383 clz r6, r4 | |
384 sub r6, r6, #24 ; shift | |
385 | |
386 ; Flag is set on the sum of count. This flag is used later | |
387 ; to determine if count >= 0 | |
388 adds r3, r3, r6 ; count += shift | |
389 lsl r5, r4, r6 ; range <<= shift | |
390 bmi token_count_lt_zero_se ; if(count >= 0) | |
391 | |
392 sub r6, r6, r3 ; offset = shift - count | |
393 sub r4, r6, #1 ; offset-1 | |
394 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000
) | |
395 bpl token_high_bit_not_set_se | |
396 | |
397 ldr r4, [r0, #vp8_writer_pos] ; x | |
398 sub r4, r4, #1 ; x = w->pos-1 | |
399 b token_zero_while_start_se | |
400 token_zero_while_loop_se | |
401 mov r10, #0 | |
402 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 | |
403 sub r4, r4, #1 ; x-- | |
404 token_zero_while_start_se | |
405 cmp r4, #0 | |
406 ldrge r7, [r0, #vp8_writer_buffer] | |
407 ldrb r11, [r7, r4] | |
408 cmpge r11, #0xff | |
409 beq token_zero_while_loop_se | |
410 | |
411 ldr r7, [r0, #vp8_writer_buffer] | |
412 ldrb r10, [r7, r4] ; w->buffer[x] | |
413 add r10, r10, #1 | |
414 strb r10, [r7, r4] ; w->buffer[x] + 1 | |
415 token_high_bit_not_set_se | |
416 rsb r4, r6, #24 ; 24-offset | |
417 ldr r10, [r0, #vp8_writer_buffer] | |
418 lsr r7, r2, r4 ; lowvalue >> (24-offset) | |
419 ldr r4, [r0, #vp8_writer_pos] ; w->pos | |
420 lsl r2, r2, r6 ; lowvalue <<= offset | |
421 mov r6, r3 ; shift = count | |
422 add r11, r4, #1 ; w->pos++ | |
423 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff | |
424 str r11, [r0, #vp8_writer_pos] | |
425 sub r3, r3, #8 ; count -= 8 | |
426 | |
427 VALIDATE_POS r10, r11 ; validate_buffer at pos | |
428 | |
429 strb r7, [r10, r4] ; w->buffer[w->pos++] | |
430 | |
431 token_count_lt_zero_se | |
432 lsl r2, r2, r6 ; lowvalue <<= shift | |
433 | |
434 subs r12, r12, #1 | |
435 bne stop_encode_loop | |
436 | |
437 ldr r4, [r0, #vp8_writer_pos] ; w->pos | |
438 ldr r12, [sp, #24] ; ptr | |
439 add r12, r12, r4 ; ptr += w->pos | |
440 str r12, [sp, #24] | |
441 | |
442 ldr r11, [sp, #28] ; i | |
443 ldr r10, [sp, #20] ; num_parts | |
444 | |
445 add r11, r11, #1 ; i++ | |
446 str r11, [sp, #28] | |
447 | |
448 ldr r7, [sp, #32] ; cpi->tp_list[i] | |
449 mov r1, #TOKENLIST_SZ | |
450 add r7, r7, r1 ; next element in cpi->tp_list | |
451 str r7, [sp, #32] ; cpi->tp_list[i+1] | |
452 | |
453 cmp r10, r11 | |
454 bgt numparts_loop | |
455 | |
456 add sp, sp, #40 | |
457 pop {r4-r12, pc} | |
458 ENDP | |
459 | |
460 _VP8_COMP_common_ | |
461 DCD vp8_comp_common | |
462 _VP8_COMMON_MBrows_ | |
463 DCD vp8_common_mb_rows | |
464 _VP8_COMP_tplist_ | |
465 DCD vp8_comp_tplist | |
466 _VP8_COMP_bc_ | |
467 DCD vp8_comp_bc | |
468 _vp8_writer_sz_ | |
469 DCD vp8_writer_sz | |
470 | |
471 END | |
OLD | NEW |