Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(485)

Side by Side Diff: xz/src/liblzma/lzma/lzma_decoder.c

Issue 2869016: Add an unpatched version of xz, XZ Utils, to /trunk/deps/third_party (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/
Patch Set: Created 10 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « xz/src/liblzma/lzma/lzma_decoder.h ('k') | xz/src/liblzma/lzma/lzma_encoder.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file lzma_decoder.c
4 /// \brief LZMA decoder
5 ///
6 // Authors: Igor Pavlov
7 // Lasse Collin
8 //
9 // This file has been put into the public domain.
10 // You can do whatever you want with this file.
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13
14 #include "lz_decoder.h"
15 #include "lzma_common.h"
16 #include "lzma_decoder.h"
17 #include "range_decoder.h"
18
19
20 #ifdef HAVE_SMALL
21
22 // Macros for (somewhat) size-optimized code.
23 #define seq_4(seq) seq
24
25 #define seq_6(seq) seq
26
27 #define seq_8(seq) seq
28
29 #define seq_len(seq) \
30 seq ## _CHOICE, \
31 seq ## _CHOICE2, \
32 seq ## _BITTREE
33
34 #define len_decode(target, ld, pos_state, seq) \
35 do { \
36 case seq ## _CHOICE: \
37 rc_if_0(ld.choice, seq ## _CHOICE) { \
38 rc_update_0(ld.choice); \
39 probs = ld.low[pos_state];\
40 limit = LEN_LOW_SYMBOLS; \
41 target = MATCH_LEN_MIN; \
42 } else { \
43 rc_update_1(ld.choice); \
44 case seq ## _CHOICE2: \
45 rc_if_0(ld.choice2, seq ## _CHOICE2) { \
46 rc_update_0(ld.choice2); \
47 probs = ld.mid[pos_state]; \
48 limit = LEN_MID_SYMBOLS; \
49 target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
50 } else { \
51 rc_update_1(ld.choice2); \
52 probs = ld.high; \
53 limit = LEN_HIGH_SYMBOLS; \
54 target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
55 + LEN_MID_SYMBOLS; \
56 } \
57 } \
58 symbol = 1; \
59 case seq ## _BITTREE: \
60 do { \
61 rc_bit(probs[symbol], , , seq ## _BITTREE); \
62 } while (symbol < limit); \
63 target += symbol - limit; \
64 } while (0)
65
66 #else // HAVE_SMALL
67
68 // Unrolled versions
69 #define seq_4(seq) \
70 seq ## 0, \
71 seq ## 1, \
72 seq ## 2, \
73 seq ## 3
74
75 #define seq_6(seq) \
76 seq ## 0, \
77 seq ## 1, \
78 seq ## 2, \
79 seq ## 3, \
80 seq ## 4, \
81 seq ## 5
82
83 #define seq_8(seq) \
84 seq ## 0, \
85 seq ## 1, \
86 seq ## 2, \
87 seq ## 3, \
88 seq ## 4, \
89 seq ## 5, \
90 seq ## 6, \
91 seq ## 7
92
93 #define seq_len(seq) \
94 seq ## _CHOICE, \
95 seq ## _LOW0, \
96 seq ## _LOW1, \
97 seq ## _LOW2, \
98 seq ## _CHOICE2, \
99 seq ## _MID0, \
100 seq ## _MID1, \
101 seq ## _MID2, \
102 seq ## _HIGH0, \
103 seq ## _HIGH1, \
104 seq ## _HIGH2, \
105 seq ## _HIGH3, \
106 seq ## _HIGH4, \
107 seq ## _HIGH5, \
108 seq ## _HIGH6, \
109 seq ## _HIGH7
110
111 #define len_decode(target, ld, pos_state, seq) \
112 do { \
113 symbol = 1; \
114 case seq ## _CHOICE: \
115 rc_if_0(ld.choice, seq ## _CHOICE) { \
116 rc_update_0(ld.choice); \
117 rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
118 rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
119 rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
120 target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
121 } else { \
122 rc_update_1(ld.choice); \
123 case seq ## _CHOICE2: \
124 rc_if_0(ld.choice2, seq ## _CHOICE2) { \
125 rc_update_0(ld.choice2); \
126 rc_bit_case(ld.mid[pos_state][symbol], , , \
127 seq ## _MID0); \
128 rc_bit_case(ld.mid[pos_state][symbol], , , \
129 seq ## _MID1); \
130 rc_bit_case(ld.mid[pos_state][symbol], , , \
131 seq ## _MID2); \
132 target = symbol - LEN_MID_SYMBOLS \
133 + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
134 } else { \
135 rc_update_1(ld.choice2); \
136 rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
137 rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
138 rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
139 rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
140 rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
141 rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
142 rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
143 rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
144 target = symbol - LEN_HIGH_SYMBOLS \
145 + MATCH_LEN_MIN \
146 + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
147 } \
148 } \
149 } while (0)
150
151 #endif // HAVE_SMALL
152
153
154 /// Length decoder probabilities; see comments in lzma_common.h.
155 typedef struct {
156 probability choice;
157 probability choice2;
158 probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
159 probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
160 probability high[LEN_HIGH_SYMBOLS];
161 } lzma_length_decoder;
162
163
164 struct lzma_coder_s {
165 ///////////////////
166 // Probabilities //
167 ///////////////////
168
169 /// Literals; see comments in lzma_common.h.
170 probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
171
172 /// If 1, it's a match. Otherwise it's a single 8-bit literal.
173 probability is_match[STATES][POS_STATES_MAX];
174
175 /// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
176 probability is_rep[STATES];
177
178 /// If 0, distance of a repeated match is rep0.
179 /// Otherwise check is_rep1.
180 probability is_rep0[STATES];
181
182 /// If 0, distance of a repeated match is rep1.
183 /// Otherwise check is_rep2.
184 probability is_rep1[STATES];
185
186 /// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
187 probability is_rep2[STATES];
188
189 /// If 1, the repeated match has length of one byte. Otherwise
190 /// the length is decoded from rep_len_decoder.
191 probability is_rep0_long[STATES][POS_STATES_MAX];
192
193 /// Probability tree for the highest two bits of the match distance.
194 /// There is a separate probability tree for match lengths of
195 /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
196 probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS];
197
198 /// Probability trees for additional bits for match distance when the
199 /// distance is in the range [4, 127].
200 probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX];
201
202 /// Probability tree for the lowest four bits of a match distance
203 /// that is equal to or greater than 128.
204 probability pos_align[ALIGN_TABLE_SIZE];
205
206 /// Length of a normal match
207 lzma_length_decoder match_len_decoder;
208
209 /// Length of a repeated match
210 lzma_length_decoder rep_len_decoder;
211
212 ///////////////////
213 // Decoder state //
214 ///////////////////
215
216 // Range coder
217 lzma_range_decoder rc;
218
219 // Types of the most recently seen LZMA symbols
220 lzma_lzma_state state;
221
222 uint32_t rep0; ///< Distance of the latest match
223 uint32_t rep1; ///< Distance of second latest match
224 uint32_t rep2; ///< Distance of third latest match
225 uint32_t rep3; ///< Distance of fourth latest match
226
227 uint32_t pos_mask; // (1U << pb) - 1
228 uint32_t literal_context_bits;
229 uint32_t literal_pos_mask;
230
231 /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
232 /// payload marker is expected.
233 lzma_vli uncompressed_size;
234
235 ////////////////////////////////
236 // State of incomplete symbol //
237 ////////////////////////////////
238
239 /// Position where to continue the decoder loop
240 enum {
241 SEQ_NORMALIZE,
242 SEQ_IS_MATCH,
243 seq_8(SEQ_LITERAL),
244 seq_8(SEQ_LITERAL_MATCHED),
245 SEQ_LITERAL_WRITE,
246 SEQ_IS_REP,
247 seq_len(SEQ_MATCH_LEN),
248 seq_6(SEQ_POS_SLOT),
249 SEQ_POS_MODEL,
250 SEQ_DIRECT,
251 seq_4(SEQ_ALIGN),
252 SEQ_EOPM,
253 SEQ_IS_REP0,
254 SEQ_SHORTREP,
255 SEQ_IS_REP0_LONG,
256 SEQ_IS_REP1,
257 SEQ_IS_REP2,
258 seq_len(SEQ_REP_LEN),
259 SEQ_COPY,
260 } sequence;
261
262 /// Base of the current probability tree
263 probability *probs;
264
265 /// Symbol being decoded. This is also used as an index variable in
266 /// bittree decoders: probs[symbol]
267 uint32_t symbol;
268
269 /// Used as a loop termination condition on bittree decoders and
270 /// direct bits decoder.
271 uint32_t limit;
272
273 /// Matched literal decoder: 0x100 or 0 to help avoiding branches.
274 /// Bittree reverse decoders: Offset of the next bit: 1 << offset
275 uint32_t offset;
276
277 /// If decoding a literal: match byte.
278 /// If decoding a match: length of the match.
279 uint32_t len;
280 };
281
282
283 static lzma_ret
284 lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,
285 const uint8_t *restrict in,
286 size_t *restrict in_pos, size_t in_size)
287 {
288 ////////////////////
289 // Initialization //
290 ////////////////////
291
292 if (!rc_read_init(&coder->rc, in, in_pos, in_size))
293 return LZMA_OK;
294
295 ///////////////
296 // Variables //
297 ///////////////
298
299 // Making local copies of often-used variables improves both
300 // speed and readability.
301
302 lzma_dict dict = *dictptr;
303
304 const size_t dict_start = dict.pos;
305
306 // Range decoder
307 rc_to_local(coder->rc, *in_pos);
308
309 // State
310 uint32_t state = coder->state;
311 uint32_t rep0 = coder->rep0;
312 uint32_t rep1 = coder->rep1;
313 uint32_t rep2 = coder->rep2;
314 uint32_t rep3 = coder->rep3;
315
316 const uint32_t pos_mask = coder->pos_mask;
317
318 // These variables are actually needed only if we last time ran
319 // out of input in the middle of the decoder loop.
320 probability *probs = coder->probs;
321 uint32_t symbol = coder->symbol;
322 uint32_t limit = coder->limit;
323 uint32_t offset = coder->offset;
324 uint32_t len = coder->len;
325
326 const uint32_t literal_pos_mask = coder->literal_pos_mask;
327 const uint32_t literal_context_bits = coder->literal_context_bits;
328
329 // Temporary variables
330 uint32_t pos_state = dict.pos & pos_mask;
331
332 lzma_ret ret = LZMA_OK;
333
334 // If uncompressed size is known, there must be no end of payload
335 // marker.
336 const bool no_eopm = coder->uncompressed_size
337 != LZMA_VLI_UNKNOWN;
338 if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
339 dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
340
341 // The main decoder loop. The "switch" is used to restart the decoder at
342 // correct location. Once restarted, the "switch" is no longer used.
343 switch (coder->sequence)
344 while (true) {
345 // Calculate new pos_state. This is skipped on the first loop
346 // since we already calculated it when setting up the local
347 // variables.
348 pos_state = dict.pos & pos_mask;
349
350 case SEQ_NORMALIZE:
351 case SEQ_IS_MATCH:
352 if (unlikely(no_eopm && dict.pos == dict.limit))
353 break;
354
355 rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
356 rc_update_0(coder->is_match[state][pos_state]);
357
358 // It's a literal i.e. a single 8-bit byte.
359
360 probs = literal_subcoder(coder->literal,
361 literal_context_bits, literal_pos_mask,
362 dict.pos, dict_get(&dict, 0));
363 symbol = 1;
364
365 if (is_literal_state(state)) {
366 // Decode literal without match byte.
367 #ifdef HAVE_SMALL
368 case SEQ_LITERAL:
369 do {
370 rc_bit(probs[symbol], , , SEQ_LITERAL);
371 } while (symbol < (1 << 8));
372 #else
373 rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
374 rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
375 rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
376 rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
377 rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
378 rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
379 rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
380 rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
381 #endif
382 } else {
383 // Decode literal with match byte.
384 //
385 // We store the byte we compare against
386 // ("match byte") to "len" to minimize the
387 // number of variables we need to store
388 // between decoder calls.
389 len = dict_get(&dict, rep0) << 1;
390
391 // The usage of "offset" allows omitting some
392 // branches, which should give tiny speed
393 // improvement on some CPUs. "offset" gets
394 // set to zero if match_bit didn't match.
395 offset = 0x100;
396
397 #ifdef HAVE_SMALL
398 case SEQ_LITERAL_MATCHED:
399 do {
400 const uint32_t match_bit
401 = len & offset;
402 const uint32_t subcoder_index
403 = offset + match_bit
404 + symbol;
405
406 rc_bit(probs[subcoder_index],
407 offset &= ~match_bit,
408 offset &= match_bit,
409 SEQ_LITERAL_MATCHED);
410
411 // It seems to be faster to do this
412 // here instead of putting it to the
413 // beginning of the loop and then
414 // putting the "case" in the middle
415 // of the loop.
416 len <<= 1;
417
418 } while (symbol < (1 << 8));
419 #else
420 // Unroll the loop.
421 uint32_t match_bit;
422 uint32_t subcoder_index;
423
424 # define d(seq) \
425 case seq: \
426 match_bit = len & offset; \
427 subcoder_index = offset + match_bit + symbol; \
428 rc_bit(probs[subcoder_index], \
429 offset &= ~match_bit, \
430 offset &= match_bit, \
431 seq)
432
433 d(SEQ_LITERAL_MATCHED0);
434 len <<= 1;
435 d(SEQ_LITERAL_MATCHED1);
436 len <<= 1;
437 d(SEQ_LITERAL_MATCHED2);
438 len <<= 1;
439 d(SEQ_LITERAL_MATCHED3);
440 len <<= 1;
441 d(SEQ_LITERAL_MATCHED4);
442 len <<= 1;
443 d(SEQ_LITERAL_MATCHED5);
444 len <<= 1;
445 d(SEQ_LITERAL_MATCHED6);
446 len <<= 1;
447 d(SEQ_LITERAL_MATCHED7);
448 # undef d
449 #endif
450 }
451
452 //update_literal(state);
453 // Use a lookup table to update to literal state,
454 // since compared to other state updates, this would
455 // need two branches.
456 static const lzma_lzma_state next_state[] = {
457 STATE_LIT_LIT,
458 STATE_LIT_LIT,
459 STATE_LIT_LIT,
460 STATE_LIT_LIT,
461 STATE_MATCH_LIT_LIT,
462 STATE_REP_LIT_LIT,
463 STATE_SHORTREP_LIT_LIT,
464 STATE_MATCH_LIT,
465 STATE_REP_LIT,
466 STATE_SHORTREP_LIT,
467 STATE_MATCH_LIT,
468 STATE_REP_LIT
469 };
470 state = next_state[state];
471
472 case SEQ_LITERAL_WRITE:
473 if (unlikely(dict_put(&dict, symbol))) {
474 coder->sequence = SEQ_LITERAL_WRITE;
475 goto out;
476 }
477
478 continue;
479 }
480
481 // Instead of a new byte we are going to get a byte range
482 // (distance and length) which will be repeated from our
483 // output history.
484
485 rc_update_1(coder->is_match[state][pos_state]);
486
487 case SEQ_IS_REP:
488 rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
489 // Not a repeated match
490 rc_update_0(coder->is_rep[state]);
491 update_match(state);
492
493 // The latest three match distances are kept in
494 // memory in case there are repeated matches.
495 rep3 = rep2;
496 rep2 = rep1;
497 rep1 = rep0;
498
499 // Decode the length of the match.
500 len_decode(len, coder->match_len_decoder,
501 pos_state, SEQ_MATCH_LEN);
502
503 // Prepare to decode the highest two bits of the
504 // match distance.
505 probs = coder->pos_slot[get_len_to_pos_state(len)];
506 symbol = 1;
507
508 #ifdef HAVE_SMALL
509 case SEQ_POS_SLOT:
510 do {
511 rc_bit(probs[symbol], , , SEQ_POS_SLOT);
512 } while (symbol < POS_SLOTS);
513 #else
514 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0);
515 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1);
516 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2);
517 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3);
518 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4);
519 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5);
520 #endif
521 // Get rid of the highest bit that was needed for
522 // indexing of the probability array.
523 symbol -= POS_SLOTS;
524 assert(symbol <= 63);
525
526 if (symbol < START_POS_MODEL_INDEX) {
527 // Match distances [0, 3] have only two bits.
528 rep0 = symbol;
529 } else {
530 // Decode the lowest [1, 29] bits of
531 // the match distance.
532 limit = (symbol >> 1) - 1;
533 assert(limit >= 1 && limit <= 30);
534 rep0 = 2 + (symbol & 1);
535
536 if (symbol < END_POS_MODEL_INDEX) {
537 // Prepare to decode the low bits for
538 // a distance of [4, 127].
539 assert(limit <= 5);
540 rep0 <<= limit;
541 assert(rep0 <= 96);
542 // -1 is fine, because we start
543 // decoding at probs[1], not probs[0].
544 // NOTE: This violates the C standard,
545 // since we are doing pointer
546 // arithmetic past the beginning of
547 // the array.
548 assert((int32_t)(rep0 - symbol - 1)
549 >= -1);
550 assert((int32_t)(rep0 - symbol - 1)
551 <= 82);
552 probs = coder->pos_special + rep0
553 - symbol - 1;
554 symbol = 1;
555 offset = 0;
556 case SEQ_POS_MODEL:
557 #ifdef HAVE_SMALL
558 do {
559 rc_bit(probs[symbol], ,
560 rep0 += 1 << offset,
561 SEQ_POS_MODEL);
562 } while (++offset < limit);
563 #else
564 switch (limit) {
565 case 5:
566 assert(offset == 0);
567 rc_bit(probs[symbol], ,
568 rep0 += 1,
569 SEQ_POS_MODEL);
570 ++offset;
571 --limit;
572 case 4:
573 rc_bit(probs[symbol], ,
574 rep0 += 1 << offset,
575 SEQ_POS_MODEL);
576 ++offset;
577 --limit;
578 case 3:
579 rc_bit(probs[symbol], ,
580 rep0 += 1 << offset,
581 SEQ_POS_MODEL);
582 ++offset;
583 --limit;
584 case 2:
585 rc_bit(probs[symbol], ,
586 rep0 += 1 << offset,
587 SEQ_POS_MODEL);
588 ++offset;
589 --limit;
590 case 1:
591 // We need "symbol" only for
592 // indexing the probability
593 // array, thus we can use
594 // rc_bit_last() here to omit
595 // the unneeded updating of
596 // "symbol".
597 rc_bit_last(probs[symbol], ,
598 rep0 += 1 << offset,
599 SEQ_POS_MODEL);
600 }
601 #endif
602 } else {
603 // The distance is >= 128. Decode the
604 // lower bits without probabilities
605 // except the lowest four bits.
606 assert(symbol >= 14);
607 assert(limit >= 6);
608 limit -= ALIGN_BITS;
609 assert(limit >= 2);
610 case SEQ_DIRECT:
611 // Not worth manual unrolling
612 do {
613 rc_direct(rep0, SEQ_DIRECT);
614 } while (--limit > 0);
615
616 // Decode the lowest four bits using
617 // probabilities.
618 rep0 <<= ALIGN_BITS;
619 symbol = 1;
620 #ifdef HAVE_SMALL
621 offset = 0;
622 case SEQ_ALIGN:
623 do {
624 rc_bit(coder->pos_align[
625 symbol], ,
626 rep0 += 1 << offset,
627 SEQ_ALIGN);
628 } while (++offset < ALIGN_BITS);
629 #else
630 case SEQ_ALIGN0:
631 rc_bit(coder->pos_align[symbol], ,
632 rep0 += 1, SEQ_ALIGN0);
633 case SEQ_ALIGN1:
634 rc_bit(coder->pos_align[symbol], ,
635 rep0 += 2, SEQ_ALIGN1);
636 case SEQ_ALIGN2:
637 rc_bit(coder->pos_align[symbol], ,
638 rep0 += 4, SEQ_ALIGN2);
639 case SEQ_ALIGN3:
640 // Like in SEQ_POS_MODEL, we don't
641 // need "symbol" for anything else
642 // than indexing the probability array.
643 rc_bit_last(coder->pos_align[symbol], ,
644 rep0 += 8, SEQ_ALIGN3);
645 #endif
646
647 if (rep0 == UINT32_MAX) {
648 // End of payload marker was
649 // found. It must not be
650 // present if uncompressed
651 // size is known.
652 if (coder->uncompressed_size
653 != LZMA_VLI_UNKNOWN) {
654 ret = LZMA_DATA_ERROR;
655 goto out;
656 }
657
658 case SEQ_EOPM:
659 // TODO Comment
660 rc_normalize(SEQ_EOPM);
661 ret = LZMA_STREAM_END;
662 goto out;
663 }
664 }
665 }
666
667 // Validate the distance we just decoded.
668 if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
669 ret = LZMA_DATA_ERROR;
670 goto out;
671 }
672
673 } else {
674 rc_update_1(coder->is_rep[state]);
675
676 // Repeated match
677 //
678 // The match distance is a value that we have had
679 // earlier. The latest four match distances are
680 // available as rep0, rep1, rep2 and rep3. We will
681 // now decode which of them is the new distance.
682 //
683 // There cannot be a match if we haven't produced
684 // any output, so check that first.
685 if (unlikely(!dict_is_distance_valid(&dict, 0))) {
686 ret = LZMA_DATA_ERROR;
687 goto out;
688 }
689
690 case SEQ_IS_REP0:
691 rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
692 rc_update_0(coder->is_rep0[state]);
693 // The distance is rep0.
694
695 case SEQ_IS_REP0_LONG:
696 rc_if_0(coder->is_rep0_long[state][pos_state],
697 SEQ_IS_REP0_LONG) {
698 rc_update_0(coder->is_rep0_long[
699 state][pos_state]);
700
701 update_short_rep(state);
702
703 case SEQ_SHORTREP:
704 if (unlikely(dict_put(&dict, dict_get(
705 &dict, rep0)))) {
706 coder->sequence = SEQ_SHORTREP;
707 goto out;
708 }
709
710 continue;
711 }
712
713 // Repeating more than one byte at
714 // distance of rep0.
715 rc_update_1(coder->is_rep0_long[
716 state][pos_state]);
717
718 } else {
719 rc_update_1(coder->is_rep0[state]);
720
721 case SEQ_IS_REP1:
722 // The distance is rep1, rep2 or rep3. Once
723 // we find out which one of these three, it
724 // is stored to rep0 and rep1, rep2 and rep3
725 // are updated accordingly.
726 rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
727 rc_update_0(coder->is_rep1[state]);
728
729 const uint32_t distance = rep1;
730 rep1 = rep0;
731 rep0 = distance;
732
733 } else {
734 rc_update_1(coder->is_rep1[state]);
735 case SEQ_IS_REP2:
736 rc_if_0(coder->is_rep2[state],
737 SEQ_IS_REP2) {
738 rc_update_0(coder->is_rep2[
739 state]);
740
741 const uint32_t distance = rep2;
742 rep2 = rep1;
743 rep1 = rep0;
744 rep0 = distance;
745
746 } else {
747 rc_update_1(coder->is_rep2[
748 state]);
749
750 const uint32_t distance = rep3;
751 rep3 = rep2;
752 rep2 = rep1;
753 rep1 = rep0;
754 rep0 = distance;
755 }
756 }
757 }
758
759 update_long_rep(state);
760
761 // Decode the length of the repeated match.
762 len_decode(len, coder->rep_len_decoder,
763 pos_state, SEQ_REP_LEN);
764 }
765
766 /////////////////////////////////
767 // Repeat from history buffer. //
768 /////////////////////////////////
769
770 // The length is always between these limits. There is no way
771 // to trigger the algorithm to set len outside this range.
772 assert(len >= MATCH_LEN_MIN);
773 assert(len <= MATCH_LEN_MAX);
774
775 case SEQ_COPY:
776 // Repeat len bytes from distance of rep0.
777 if (unlikely(dict_repeat(&dict, rep0, &len))) {
778 coder->sequence = SEQ_COPY;
779 goto out;
780 }
781 }
782
783 rc_normalize(SEQ_NORMALIZE);
784 coder->sequence = SEQ_IS_MATCH;
785
786 out:
787 // Save state
788
789 // NOTE: Must not copy dict.limit.
790 dictptr->pos = dict.pos;
791 dictptr->full = dict.full;
792
793 rc_from_local(coder->rc, *in_pos);
794
795 coder->state = state;
796 coder->rep0 = rep0;
797 coder->rep1 = rep1;
798 coder->rep2 = rep2;
799 coder->rep3 = rep3;
800
801 coder->probs = probs;
802 coder->symbol = symbol;
803 coder->limit = limit;
804 coder->offset = offset;
805 coder->len = len;
806
807 // Update the remaining amount of uncompressed data if uncompressed
808 // size was known.
809 if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
810 coder->uncompressed_size -= dict.pos - dict_start;
811
812 // Since there cannot be end of payload marker if the
813 // uncompressed size was known, we check here if we
814 // finished decoding.
815 if (coder->uncompressed_size == 0 && ret == LZMA_OK
816 && coder->sequence != SEQ_NORMALIZE)
817 ret = coder->sequence == SEQ_IS_MATCH
818 ? LZMA_STREAM_END : LZMA_DATA_ERROR;
819 }
820
821 // We can do an additional check in the range decoder to catch some
822 // corrupted files.
823 if (ret == LZMA_STREAM_END) {
824 if (!rc_is_finished(coder->rc))
825 ret = LZMA_DATA_ERROR;
826
827 // Reset the range decoder so that it is ready to reinitialize
828 // for a new LZMA2 chunk.
829 rc_reset(coder->rc);
830 }
831
832 return ret;
833 }
834
835
836
837 static void
838 lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size)
839 {
840 coder->uncompressed_size = uncompressed_size;
841 }
842
843 /*
844 extern void
845 lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
846 {
847 // This is hack.
848 (*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size;
849 }
850 */
851
852 static void
853 lzma_decoder_reset(lzma_coder *coder, const void *opt)
854 {
855 const lzma_options_lzma *options = opt;
856
857 // NOTE: We assume that lc/lp/pb are valid since they were
858 // successfully decoded with lzma_lzma_decode_properties().
859 // FIXME?
860
861 // Calculate pos_mask. We don't need pos_bits as is for anything.
862 coder->pos_mask = (1U << options->pb) - 1;
863
864 // Initialize the literal decoder.
865 literal_init(coder->literal, options->lc, options->lp);
866
867 coder->literal_context_bits = options->lc;
868 coder->literal_pos_mask = (1U << options->lp) - 1;
869
870 // State
871 coder->state = STATE_LIT_LIT;
872 coder->rep0 = 0;
873 coder->rep1 = 0;
874 coder->rep2 = 0;
875 coder->rep3 = 0;
876 coder->pos_mask = (1U << options->pb) - 1;
877
878 // Range decoder
879 rc_reset(coder->rc);
880
881 // Bit and bittree decoders
882 for (uint32_t i = 0; i < STATES; ++i) {
883 for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
884 bit_reset(coder->is_match[i][j]);
885 bit_reset(coder->is_rep0_long[i][j]);
886 }
887
888 bit_reset(coder->is_rep[i]);
889 bit_reset(coder->is_rep0[i]);
890 bit_reset(coder->is_rep1[i]);
891 bit_reset(coder->is_rep2[i]);
892 }
893
894 for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i)
895 bittree_reset(coder->pos_slot[i], POS_SLOT_BITS);
896
897 for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)
898 bit_reset(coder->pos_special[i]);
899
900 bittree_reset(coder->pos_align, ALIGN_BITS);
901
902 // Len decoders (also bit/bittree)
903 const uint32_t num_pos_states = 1U << options->pb;
904 bit_reset(coder->match_len_decoder.choice);
905 bit_reset(coder->match_len_decoder.choice2);
906 bit_reset(coder->rep_len_decoder.choice);
907 bit_reset(coder->rep_len_decoder.choice2);
908
909 for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
910 bittree_reset(coder->match_len_decoder.low[pos_state],
911 LEN_LOW_BITS);
912 bittree_reset(coder->match_len_decoder.mid[pos_state],
913 LEN_MID_BITS);
914
915 bittree_reset(coder->rep_len_decoder.low[pos_state],
916 LEN_LOW_BITS);
917 bittree_reset(coder->rep_len_decoder.mid[pos_state],
918 LEN_MID_BITS);
919 }
920
921 bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
922 bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
923
924 coder->sequence = SEQ_IS_MATCH;
925 coder->probs = NULL;
926 coder->symbol = 0;
927 coder->limit = 0;
928 coder->offset = 0;
929 coder->len = 0;
930
931 return;
932 }
933
934
935 extern lzma_ret
936 lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
937 const void *opt, lzma_lz_options *lz_options)
938 {
939 if (lz->coder == NULL) {
940 lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
941 if (lz->coder == NULL)
942 return LZMA_MEM_ERROR;
943
944 lz->code = &lzma_decode;
945 lz->reset = &lzma_decoder_reset;
946 lz->set_uncompressed = &lzma_decoder_uncompressed;
947 }
948
949 // All dictionary sizes are OK here. LZ decoder will take care of
950 // the special cases.
951 const lzma_options_lzma *options = opt;
952 lz_options->dict_size = options->dict_size;
953 lz_options->preset_dict = options->preset_dict;
954 lz_options->preset_dict_size = options->preset_dict_size;
955
956 return LZMA_OK;
957 }
958
959
960 /// Allocate and initialize LZMA decoder. This is used only via LZ
961 /// initialization (lzma_lzma_decoder_init() passes function pointer to
962 /// the LZ initialization).
963 static lzma_ret
964 lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
965 const void *options, lzma_lz_options *lz_options)
966 {
967 if (!is_lclppb_valid(options))
968 return LZMA_PROG_ERROR;
969
970 return_if_error(lzma_lzma_decoder_create(
971 lz, allocator, options, lz_options));
972
973 lzma_decoder_reset(lz->coder, options);
974 lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
975
976 return LZMA_OK;
977 }
978
979
980 extern lzma_ret
981 lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
982 const lzma_filter_info *filters)
983 {
984 // LZMA can only be the last filter in the chain. This is enforced
985 // by the raw_decoder initialization.
986 assert(filters[1].init == NULL);
987
988 return lzma_lz_decoder_init(next, allocator, filters,
989 &lzma_decoder_init);
990 }
991
992
993 extern bool
994 lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
995 {
996 if (byte > (4 * 5 + 4) * 9 + 8)
997 return true;
998
999 // See the file format specification to understand this.
1000 options->pb = byte / (9 * 5);
1001 byte -= options->pb * 9 * 5;
1002 options->lp = byte / 9;
1003 options->lc = byte - options->lp * 9;
1004
1005 return options->lc + options->lp > LZMA_LCLP_MAX;
1006 }
1007
1008
1009 extern uint64_t
1010 lzma_lzma_decoder_memusage_nocheck(const void *options)
1011 {
1012 const lzma_options_lzma *const opt = options;
1013 return sizeof(lzma_coder) + lzma_lz_decoder_memusage(opt->dict_size);
1014 }
1015
1016
1017 extern uint64_t
1018 lzma_lzma_decoder_memusage(const void *options)
1019 {
1020 if (!is_lclppb_valid(options))
1021 return UINT64_MAX;
1022
1023 return lzma_lzma_decoder_memusage_nocheck(options);
1024 }
1025
1026
1027 extern lzma_ret
1028 lzma_lzma_props_decode(void **options, lzma_allocator *allocator,
1029 const uint8_t *props, size_t props_size)
1030 {
1031 if (props_size != 5)
1032 return LZMA_OPTIONS_ERROR;
1033
1034 lzma_options_lzma *opt
1035 = lzma_alloc(sizeof(lzma_options_lzma), allocator);
1036 if (opt == NULL)
1037 return LZMA_MEM_ERROR;
1038
1039 if (lzma_lzma_lclppb_decode(opt, props[0]))
1040 goto error;
1041
1042 // All dictionary sizes are accepted, including zero. LZ decoder
1043 // will automatically use a dictionary at least a few KiB even if
1044 // a smaller dictionary is requested.
1045 opt->dict_size = unaligned_read32le(props + 1);
1046
1047 opt->preset_dict = NULL;
1048 opt->preset_dict_size = 0;
1049
1050 *options = opt;
1051
1052 return LZMA_OK;
1053
1054 error:
1055 lzma_free(opt, allocator);
1056 return LZMA_OPTIONS_ERROR;
1057 }
OLDNEW
« no previous file with comments | « xz/src/liblzma/lzma/lzma_decoder.h ('k') | xz/src/liblzma/lzma/lzma_encoder.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698