| Index: xz/src/liblzma/lzma/lzma_decoder.c
|
| ===================================================================
|
| --- xz/src/liblzma/lzma/lzma_decoder.c (revision 0)
|
| +++ xz/src/liblzma/lzma/lzma_decoder.c (revision 0)
|
| @@ -0,0 +1,1057 @@
|
| +///////////////////////////////////////////////////////////////////////////////
|
| +//
|
| +/// \file lzma_decoder.c
|
| +/// \brief LZMA decoder
|
| +///
|
| +// Authors: Igor Pavlov
|
| +// Lasse Collin
|
| +//
|
| +// This file has been put into the public domain.
|
| +// You can do whatever you want with this file.
|
| +//
|
| +///////////////////////////////////////////////////////////////////////////////
|
| +
|
| +#include "lz_decoder.h"
|
| +#include "lzma_common.h"
|
| +#include "lzma_decoder.h"
|
| +#include "range_decoder.h"
|
| +
|
| +
|
| +#ifdef HAVE_SMALL
|
| +
|
| +// Macros for (somewhat) size-optimized code.
|
| +#define seq_4(seq) seq
|
| +
|
| +#define seq_6(seq) seq
|
| +
|
| +#define seq_8(seq) seq
|
| +
|
| +#define seq_len(seq) \
|
| + seq ## _CHOICE, \
|
| + seq ## _CHOICE2, \
|
| + seq ## _BITTREE
|
| +
|
| +#define len_decode(target, ld, pos_state, seq) \
|
| +do { \
|
| +case seq ## _CHOICE: \
|
| + rc_if_0(ld.choice, seq ## _CHOICE) { \
|
| + rc_update_0(ld.choice); \
|
| + probs = ld.low[pos_state];\
|
| + limit = LEN_LOW_SYMBOLS; \
|
| + target = MATCH_LEN_MIN; \
|
| + } else { \
|
| + rc_update_1(ld.choice); \
|
| +case seq ## _CHOICE2: \
|
| + rc_if_0(ld.choice2, seq ## _CHOICE2) { \
|
| + rc_update_0(ld.choice2); \
|
| + probs = ld.mid[pos_state]; \
|
| + limit = LEN_MID_SYMBOLS; \
|
| + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
|
| + } else { \
|
| + rc_update_1(ld.choice2); \
|
| + probs = ld.high; \
|
| + limit = LEN_HIGH_SYMBOLS; \
|
| + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
|
| + + LEN_MID_SYMBOLS; \
|
| + } \
|
| + } \
|
| + symbol = 1; \
|
| +case seq ## _BITTREE: \
|
| + do { \
|
| + rc_bit(probs[symbol], , , seq ## _BITTREE); \
|
| + } while (symbol < limit); \
|
| + target += symbol - limit; \
|
| +} while (0)
|
| +
|
| +#else // HAVE_SMALL
|
| +
|
| +// Unrolled versions
|
| +#define seq_4(seq) \
|
| + seq ## 0, \
|
| + seq ## 1, \
|
| + seq ## 2, \
|
| + seq ## 3
|
| +
|
| +#define seq_6(seq) \
|
| + seq ## 0, \
|
| + seq ## 1, \
|
| + seq ## 2, \
|
| + seq ## 3, \
|
| + seq ## 4, \
|
| + seq ## 5
|
| +
|
| +#define seq_8(seq) \
|
| + seq ## 0, \
|
| + seq ## 1, \
|
| + seq ## 2, \
|
| + seq ## 3, \
|
| + seq ## 4, \
|
| + seq ## 5, \
|
| + seq ## 6, \
|
| + seq ## 7
|
| +
|
| +#define seq_len(seq) \
|
| + seq ## _CHOICE, \
|
| + seq ## _LOW0, \
|
| + seq ## _LOW1, \
|
| + seq ## _LOW2, \
|
| + seq ## _CHOICE2, \
|
| + seq ## _MID0, \
|
| + seq ## _MID1, \
|
| + seq ## _MID2, \
|
| + seq ## _HIGH0, \
|
| + seq ## _HIGH1, \
|
| + seq ## _HIGH2, \
|
| + seq ## _HIGH3, \
|
| + seq ## _HIGH4, \
|
| + seq ## _HIGH5, \
|
| + seq ## _HIGH6, \
|
| + seq ## _HIGH7
|
| +
|
| +#define len_decode(target, ld, pos_state, seq) \
|
| +do { \
|
| + symbol = 1; \
|
| +case seq ## _CHOICE: \
|
| + rc_if_0(ld.choice, seq ## _CHOICE) { \
|
| + rc_update_0(ld.choice); \
|
| + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
|
| + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
|
| + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
|
| + target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
|
| + } else { \
|
| + rc_update_1(ld.choice); \
|
| +case seq ## _CHOICE2: \
|
| + rc_if_0(ld.choice2, seq ## _CHOICE2) { \
|
| + rc_update_0(ld.choice2); \
|
| + rc_bit_case(ld.mid[pos_state][symbol], , , \
|
| + seq ## _MID0); \
|
| + rc_bit_case(ld.mid[pos_state][symbol], , , \
|
| + seq ## _MID1); \
|
| + rc_bit_case(ld.mid[pos_state][symbol], , , \
|
| + seq ## _MID2); \
|
| + target = symbol - LEN_MID_SYMBOLS \
|
| + + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
|
| + } else { \
|
| + rc_update_1(ld.choice2); \
|
| + rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
|
| + rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
|
| + rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
|
| + rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
|
| + rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
|
| + rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
|
| + rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
|
| + rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
|
| + target = symbol - LEN_HIGH_SYMBOLS \
|
| + + MATCH_LEN_MIN \
|
| + + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
|
| + } \
|
| + } \
|
| +} while (0)
|
| +
|
| +#endif // HAVE_SMALL
|
| +
|
| +
|
| +/// Length decoder probabilities; see comments in lzma_common.h.
|
| +typedef struct {
|
| + probability choice;
|
| + probability choice2;
|
| + probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
|
| + probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
|
| + probability high[LEN_HIGH_SYMBOLS];
|
| +} lzma_length_decoder;
|
| +
|
| +
|
| +struct lzma_coder_s {
|
| + ///////////////////
|
| + // Probabilities //
|
| + ///////////////////
|
| +
|
| + /// Literals; see comments in lzma_common.h.
|
| + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
|
| +
|
| + /// If 1, it's a match. Otherwise it's a single 8-bit literal.
|
| + probability is_match[STATES][POS_STATES_MAX];
|
| +
|
| + /// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
|
| + probability is_rep[STATES];
|
| +
|
| + /// If 0, distance of a repeated match is rep0.
|
| + /// Otherwise check is_rep1.
|
| + probability is_rep0[STATES];
|
| +
|
| + /// If 0, distance of a repeated match is rep1.
|
| + /// Otherwise check is_rep2.
|
| + probability is_rep1[STATES];
|
| +
|
| + /// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
|
| + probability is_rep2[STATES];
|
| +
|
| + /// If 1, the repeated match has length of one byte. Otherwise
|
| + /// the length is decoded from rep_len_decoder.
|
| + probability is_rep0_long[STATES][POS_STATES_MAX];
|
| +
|
| + /// Probability tree for the highest two bits of the match distance.
|
| + /// There is a separate probability tree for match lengths of
|
| + /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
|
| + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS];
|
| +
|
| + /// Probability trees for additional bits for match distance when the
|
| + /// distance is in the range [4, 127].
|
| + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX];
|
| +
|
| + /// Probability tree for the lowest four bits of a match distance
|
| + /// that is equal to or greater than 128.
|
| + probability pos_align[ALIGN_TABLE_SIZE];
|
| +
|
| + /// Length of a normal match
|
| + lzma_length_decoder match_len_decoder;
|
| +
|
| + /// Length of a repeated match
|
| + lzma_length_decoder rep_len_decoder;
|
| +
|
| + ///////////////////
|
| + // Decoder state //
|
| + ///////////////////
|
| +
|
| + // Range coder
|
| + lzma_range_decoder rc;
|
| +
|
| + // Types of the most recently seen LZMA symbols
|
| + lzma_lzma_state state;
|
| +
|
| + uint32_t rep0; ///< Distance of the latest match
|
| + uint32_t rep1; ///< Distance of second latest match
|
| + uint32_t rep2; ///< Distance of third latest match
|
| + uint32_t rep3; ///< Distance of fourth latest match
|
| +
|
| + uint32_t pos_mask; // (1U << pb) - 1
|
| + uint32_t literal_context_bits;
|
| + uint32_t literal_pos_mask;
|
| +
|
| + /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
|
| + /// payload marker is expected.
|
| + lzma_vli uncompressed_size;
|
| +
|
| + ////////////////////////////////
|
| + // State of incomplete symbol //
|
| + ////////////////////////////////
|
| +
|
| + /// Position where to continue the decoder loop
|
| + enum {
|
| + SEQ_NORMALIZE,
|
| + SEQ_IS_MATCH,
|
| + seq_8(SEQ_LITERAL),
|
| + seq_8(SEQ_LITERAL_MATCHED),
|
| + SEQ_LITERAL_WRITE,
|
| + SEQ_IS_REP,
|
| + seq_len(SEQ_MATCH_LEN),
|
| + seq_6(SEQ_POS_SLOT),
|
| + SEQ_POS_MODEL,
|
| + SEQ_DIRECT,
|
| + seq_4(SEQ_ALIGN),
|
| + SEQ_EOPM,
|
| + SEQ_IS_REP0,
|
| + SEQ_SHORTREP,
|
| + SEQ_IS_REP0_LONG,
|
| + SEQ_IS_REP1,
|
| + SEQ_IS_REP2,
|
| + seq_len(SEQ_REP_LEN),
|
| + SEQ_COPY,
|
| + } sequence;
|
| +
|
| + /// Base of the current probability tree
|
| + probability *probs;
|
| +
|
| + /// Symbol being decoded. This is also used as an index variable in
|
| + /// bittree decoders: probs[symbol]
|
| + uint32_t symbol;
|
| +
|
| + /// Used as a loop termination condition on bittree decoders and
|
| + /// direct bits decoder.
|
| + uint32_t limit;
|
| +
|
| + /// Matched literal decoder: 0x100 or 0 to help avoiding branches.
|
| + /// Bittree reverse decoders: Offset of the next bit: 1 << offset
|
| + uint32_t offset;
|
| +
|
| + /// If decoding a literal: match byte.
|
| + /// If decoding a match: length of the match.
|
| + uint32_t len;
|
| +};
|
| +
|
| +
|
| +static lzma_ret
|
| +lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,
|
| + const uint8_t *restrict in,
|
| + size_t *restrict in_pos, size_t in_size)
|
| +{
|
| + ////////////////////
|
| + // Initialization //
|
| + ////////////////////
|
| +
|
| + if (!rc_read_init(&coder->rc, in, in_pos, in_size))
|
| + return LZMA_OK;
|
| +
|
| + ///////////////
|
| + // Variables //
|
| + ///////////////
|
| +
|
| + // Making local copies of often-used variables improves both
|
| + // speed and readability.
|
| +
|
| + lzma_dict dict = *dictptr;
|
| +
|
| + const size_t dict_start = dict.pos;
|
| +
|
| + // Range decoder
|
| + rc_to_local(coder->rc, *in_pos);
|
| +
|
| + // State
|
| + uint32_t state = coder->state;
|
| + uint32_t rep0 = coder->rep0;
|
| + uint32_t rep1 = coder->rep1;
|
| + uint32_t rep2 = coder->rep2;
|
| + uint32_t rep3 = coder->rep3;
|
| +
|
| + const uint32_t pos_mask = coder->pos_mask;
|
| +
|
| + // These variables are actually needed only if we last time ran
|
| + // out of input in the middle of the decoder loop.
|
| + probability *probs = coder->probs;
|
| + uint32_t symbol = coder->symbol;
|
| + uint32_t limit = coder->limit;
|
| + uint32_t offset = coder->offset;
|
| + uint32_t len = coder->len;
|
| +
|
| + const uint32_t literal_pos_mask = coder->literal_pos_mask;
|
| + const uint32_t literal_context_bits = coder->literal_context_bits;
|
| +
|
| + // Temporary variables
|
| + uint32_t pos_state = dict.pos & pos_mask;
|
| +
|
| + lzma_ret ret = LZMA_OK;
|
| +
|
| + // If uncompressed size is known, there must be no end of payload
|
| + // marker.
|
| + const bool no_eopm = coder->uncompressed_size
|
| + != LZMA_VLI_UNKNOWN;
|
| + if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
|
| + dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
|
| +
|
| + // The main decoder loop. The "switch" is used to restart the decoder at
|
| + // correct location. Once restarted, the "switch" is no longer used.
|
| + switch (coder->sequence)
|
| + while (true) {
|
| + // Calculate new pos_state. This is skipped on the first loop
|
| + // since we already calculated it when setting up the local
|
| + // variables.
|
| + pos_state = dict.pos & pos_mask;
|
| +
|
| + case SEQ_NORMALIZE:
|
| + case SEQ_IS_MATCH:
|
| + if (unlikely(no_eopm && dict.pos == dict.limit))
|
| + break;
|
| +
|
| + rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
|
| + rc_update_0(coder->is_match[state][pos_state]);
|
| +
|
| + // It's a literal i.e. a single 8-bit byte.
|
| +
|
| + probs = literal_subcoder(coder->literal,
|
| + literal_context_bits, literal_pos_mask,
|
| + dict.pos, dict_get(&dict, 0));
|
| + symbol = 1;
|
| +
|
| + if (is_literal_state(state)) {
|
| + // Decode literal without match byte.
|
| +#ifdef HAVE_SMALL
|
| + case SEQ_LITERAL:
|
| + do {
|
| + rc_bit(probs[symbol], , , SEQ_LITERAL);
|
| + } while (symbol < (1 << 8));
|
| +#else
|
| + rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
|
| + rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
|
| + rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
|
| + rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
|
| + rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
|
| + rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
|
| + rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
|
| + rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
|
| +#endif
|
| + } else {
|
| + // Decode literal with match byte.
|
| + //
|
| + // We store the byte we compare against
|
| + // ("match byte") to "len" to minimize the
|
| + // number of variables we need to store
|
| + // between decoder calls.
|
| + len = dict_get(&dict, rep0) << 1;
|
| +
|
| + // The usage of "offset" allows omitting some
|
| + // branches, which should give tiny speed
|
| + // improvement on some CPUs. "offset" gets
|
| + // set to zero if match_bit didn't match.
|
| + offset = 0x100;
|
| +
|
| +#ifdef HAVE_SMALL
|
| + case SEQ_LITERAL_MATCHED:
|
| + do {
|
| + const uint32_t match_bit
|
| + = len & offset;
|
| + const uint32_t subcoder_index
|
| + = offset + match_bit
|
| + + symbol;
|
| +
|
| + rc_bit(probs[subcoder_index],
|
| + offset &= ~match_bit,
|
| + offset &= match_bit,
|
| + SEQ_LITERAL_MATCHED);
|
| +
|
| + // It seems to be faster to do this
|
| + // here instead of putting it to the
|
| + // beginning of the loop and then
|
| + // putting the "case" in the middle
|
| + // of the loop.
|
| + len <<= 1;
|
| +
|
| + } while (symbol < (1 << 8));
|
| +#else
|
| + // Unroll the loop.
|
| + uint32_t match_bit;
|
| + uint32_t subcoder_index;
|
| +
|
| +# define d(seq) \
|
| + case seq: \
|
| + match_bit = len & offset; \
|
| + subcoder_index = offset + match_bit + symbol; \
|
| + rc_bit(probs[subcoder_index], \
|
| + offset &= ~match_bit, \
|
| + offset &= match_bit, \
|
| + seq)
|
| +
|
| + d(SEQ_LITERAL_MATCHED0);
|
| + len <<= 1;
|
| + d(SEQ_LITERAL_MATCHED1);
|
| + len <<= 1;
|
| + d(SEQ_LITERAL_MATCHED2);
|
| + len <<= 1;
|
| + d(SEQ_LITERAL_MATCHED3);
|
| + len <<= 1;
|
| + d(SEQ_LITERAL_MATCHED4);
|
| + len <<= 1;
|
| + d(SEQ_LITERAL_MATCHED5);
|
| + len <<= 1;
|
| + d(SEQ_LITERAL_MATCHED6);
|
| + len <<= 1;
|
| + d(SEQ_LITERAL_MATCHED7);
|
| +# undef d
|
| +#endif
|
| + }
|
| +
|
| + //update_literal(state);
|
| + // Use a lookup table to update to literal state,
|
| + // since compared to other state updates, this would
|
| + // need two branches.
|
| + static const lzma_lzma_state next_state[] = {
|
| + STATE_LIT_LIT,
|
| + STATE_LIT_LIT,
|
| + STATE_LIT_LIT,
|
| + STATE_LIT_LIT,
|
| + STATE_MATCH_LIT_LIT,
|
| + STATE_REP_LIT_LIT,
|
| + STATE_SHORTREP_LIT_LIT,
|
| + STATE_MATCH_LIT,
|
| + STATE_REP_LIT,
|
| + STATE_SHORTREP_LIT,
|
| + STATE_MATCH_LIT,
|
| + STATE_REP_LIT
|
| + };
|
| + state = next_state[state];
|
| +
|
| + case SEQ_LITERAL_WRITE:
|
| + if (unlikely(dict_put(&dict, symbol))) {
|
| + coder->sequence = SEQ_LITERAL_WRITE;
|
| + goto out;
|
| + }
|
| +
|
| + continue;
|
| + }
|
| +
|
| + // Instead of a new byte we are going to get a byte range
|
| + // (distance and length) which will be repeated from our
|
| + // output history.
|
| +
|
| + rc_update_1(coder->is_match[state][pos_state]);
|
| +
|
| + case SEQ_IS_REP:
|
| + rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
|
| + // Not a repeated match
|
| + rc_update_0(coder->is_rep[state]);
|
| + update_match(state);
|
| +
|
| + // The latest three match distances are kept in
|
| + // memory in case there are repeated matches.
|
| + rep3 = rep2;
|
| + rep2 = rep1;
|
| + rep1 = rep0;
|
| +
|
| + // Decode the length of the match.
|
| + len_decode(len, coder->match_len_decoder,
|
| + pos_state, SEQ_MATCH_LEN);
|
| +
|
| + // Prepare to decode the highest two bits of the
|
| + // match distance.
|
| + probs = coder->pos_slot[get_len_to_pos_state(len)];
|
| + symbol = 1;
|
| +
|
| +#ifdef HAVE_SMALL
|
| + case SEQ_POS_SLOT:
|
| + do {
|
| + rc_bit(probs[symbol], , , SEQ_POS_SLOT);
|
| + } while (symbol < POS_SLOTS);
|
| +#else
|
| + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0);
|
| + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1);
|
| + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2);
|
| + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3);
|
| + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4);
|
| + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5);
|
| +#endif
|
| + // Get rid of the highest bit that was needed for
|
| + // indexing of the probability array.
|
| + symbol -= POS_SLOTS;
|
| + assert(symbol <= 63);
|
| +
|
| + if (symbol < START_POS_MODEL_INDEX) {
|
| + // Match distances [0, 3] have only two bits.
|
| + rep0 = symbol;
|
| + } else {
|
| + // Decode the lowest [1, 29] bits of
|
| + // the match distance.
|
| + limit = (symbol >> 1) - 1;
|
| + assert(limit >= 1 && limit <= 30);
|
| + rep0 = 2 + (symbol & 1);
|
| +
|
| + if (symbol < END_POS_MODEL_INDEX) {
|
| + // Prepare to decode the low bits for
|
| + // a distance of [4, 127].
|
| + assert(limit <= 5);
|
| + rep0 <<= limit;
|
| + assert(rep0 <= 96);
|
| + // -1 is fine, because we start
|
| + // decoding at probs[1], not probs[0].
|
| + // NOTE: This violates the C standard,
|
| + // since we are doing pointer
|
| + // arithmetic past the beginning of
|
| + // the array.
|
| + assert((int32_t)(rep0 - symbol - 1)
|
| + >= -1);
|
| + assert((int32_t)(rep0 - symbol - 1)
|
| + <= 82);
|
| + probs = coder->pos_special + rep0
|
| + - symbol - 1;
|
| + symbol = 1;
|
| + offset = 0;
|
| + case SEQ_POS_MODEL:
|
| +#ifdef HAVE_SMALL
|
| + do {
|
| + rc_bit(probs[symbol], ,
|
| + rep0 += 1 << offset,
|
| + SEQ_POS_MODEL);
|
| + } while (++offset < limit);
|
| +#else
|
| + switch (limit) {
|
| + case 5:
|
| + assert(offset == 0);
|
| + rc_bit(probs[symbol], ,
|
| + rep0 += 1,
|
| + SEQ_POS_MODEL);
|
| + ++offset;
|
| + --limit;
|
| + case 4:
|
| + rc_bit(probs[symbol], ,
|
| + rep0 += 1 << offset,
|
| + SEQ_POS_MODEL);
|
| + ++offset;
|
| + --limit;
|
| + case 3:
|
| + rc_bit(probs[symbol], ,
|
| + rep0 += 1 << offset,
|
| + SEQ_POS_MODEL);
|
| + ++offset;
|
| + --limit;
|
| + case 2:
|
| + rc_bit(probs[symbol], ,
|
| + rep0 += 1 << offset,
|
| + SEQ_POS_MODEL);
|
| + ++offset;
|
| + --limit;
|
| + case 1:
|
| + // We need "symbol" only for
|
| + // indexing the probability
|
| + // array, thus we can use
|
| + // rc_bit_last() here to omit
|
| + // the unneeded updating of
|
| + // "symbol".
|
| + rc_bit_last(probs[symbol], ,
|
| + rep0 += 1 << offset,
|
| + SEQ_POS_MODEL);
|
| + }
|
| +#endif
|
| + } else {
|
| + // The distance is >= 128. Decode the
|
| + // lower bits without probabilities
|
| + // except the lowest four bits.
|
| + assert(symbol >= 14);
|
| + assert(limit >= 6);
|
| + limit -= ALIGN_BITS;
|
| + assert(limit >= 2);
|
| + case SEQ_DIRECT:
|
| + // Not worth manual unrolling
|
| + do {
|
| + rc_direct(rep0, SEQ_DIRECT);
|
| + } while (--limit > 0);
|
| +
|
| + // Decode the lowest four bits using
|
| + // probabilities.
|
| + rep0 <<= ALIGN_BITS;
|
| + symbol = 1;
|
| +#ifdef HAVE_SMALL
|
| + offset = 0;
|
| + case SEQ_ALIGN:
|
| + do {
|
| + rc_bit(coder->pos_align[
|
| + symbol], ,
|
| + rep0 += 1 << offset,
|
| + SEQ_ALIGN);
|
| + } while (++offset < ALIGN_BITS);
|
| +#else
|
| + case SEQ_ALIGN0:
|
| + rc_bit(coder->pos_align[symbol], ,
|
| + rep0 += 1, SEQ_ALIGN0);
|
| + case SEQ_ALIGN1:
|
| + rc_bit(coder->pos_align[symbol], ,
|
| + rep0 += 2, SEQ_ALIGN1);
|
| + case SEQ_ALIGN2:
|
| + rc_bit(coder->pos_align[symbol], ,
|
| + rep0 += 4, SEQ_ALIGN2);
|
| + case SEQ_ALIGN3:
|
| + // Like in SEQ_POS_MODEL, we don't
|
| + // need "symbol" for anything else
|
| + // than indexing the probability array.
|
| + rc_bit_last(coder->pos_align[symbol], ,
|
| + rep0 += 8, SEQ_ALIGN3);
|
| +#endif
|
| +
|
| + if (rep0 == UINT32_MAX) {
|
| + // End of payload marker was
|
| + // found. It must not be
|
| + // present if uncompressed
|
| + // size is known.
|
| + if (coder->uncompressed_size
|
| + != LZMA_VLI_UNKNOWN) {
|
| + ret = LZMA_DATA_ERROR;
|
| + goto out;
|
| + }
|
| +
|
| + case SEQ_EOPM:
|
| + // TODO Comment
|
| + rc_normalize(SEQ_EOPM);
|
| + ret = LZMA_STREAM_END;
|
| + goto out;
|
| + }
|
| + }
|
| + }
|
| +
|
| + // Validate the distance we just decoded.
|
| + if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
|
| + ret = LZMA_DATA_ERROR;
|
| + goto out;
|
| + }
|
| +
|
| + } else {
|
| + rc_update_1(coder->is_rep[state]);
|
| +
|
| + // Repeated match
|
| + //
|
| + // The match distance is a value that we have had
|
| + // earlier. The latest four match distances are
|
| + // available as rep0, rep1, rep2 and rep3. We will
|
| + // now decode which of them is the new distance.
|
| + //
|
| + // There cannot be a match if we haven't produced
|
| + // any output, so check that first.
|
| + if (unlikely(!dict_is_distance_valid(&dict, 0))) {
|
| + ret = LZMA_DATA_ERROR;
|
| + goto out;
|
| + }
|
| +
|
| + case SEQ_IS_REP0:
|
| + rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
|
| + rc_update_0(coder->is_rep0[state]);
|
| + // The distance is rep0.
|
| +
|
| + case SEQ_IS_REP0_LONG:
|
| + rc_if_0(coder->is_rep0_long[state][pos_state],
|
| + SEQ_IS_REP0_LONG) {
|
| + rc_update_0(coder->is_rep0_long[
|
| + state][pos_state]);
|
| +
|
| + update_short_rep(state);
|
| +
|
| + case SEQ_SHORTREP:
|
| + if (unlikely(dict_put(&dict, dict_get(
|
| + &dict, rep0)))) {
|
| + coder->sequence = SEQ_SHORTREP;
|
| + goto out;
|
| + }
|
| +
|
| + continue;
|
| + }
|
| +
|
| + // Repeating more than one byte at
|
| + // distance of rep0.
|
| + rc_update_1(coder->is_rep0_long[
|
| + state][pos_state]);
|
| +
|
| + } else {
|
| + rc_update_1(coder->is_rep0[state]);
|
| +
|
| + case SEQ_IS_REP1:
|
| + // The distance is rep1, rep2 or rep3. Once
|
| + // we find out which one of these three, it
|
| + // is stored to rep0 and rep1, rep2 and rep3
|
| + // are updated accordingly.
|
| + rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
|
| + rc_update_0(coder->is_rep1[state]);
|
| +
|
| + const uint32_t distance = rep1;
|
| + rep1 = rep0;
|
| + rep0 = distance;
|
| +
|
| + } else {
|
| + rc_update_1(coder->is_rep1[state]);
|
| + case SEQ_IS_REP2:
|
| + rc_if_0(coder->is_rep2[state],
|
| + SEQ_IS_REP2) {
|
| + rc_update_0(coder->is_rep2[
|
| + state]);
|
| +
|
| + const uint32_t distance = rep2;
|
| + rep2 = rep1;
|
| + rep1 = rep0;
|
| + rep0 = distance;
|
| +
|
| + } else {
|
| + rc_update_1(coder->is_rep2[
|
| + state]);
|
| +
|
| + const uint32_t distance = rep3;
|
| + rep3 = rep2;
|
| + rep2 = rep1;
|
| + rep1 = rep0;
|
| + rep0 = distance;
|
| + }
|
| + }
|
| + }
|
| +
|
| + update_long_rep(state);
|
| +
|
| + // Decode the length of the repeated match.
|
| + len_decode(len, coder->rep_len_decoder,
|
| + pos_state, SEQ_REP_LEN);
|
| + }
|
| +
|
| + /////////////////////////////////
|
| + // Repeat from history buffer. //
|
| + /////////////////////////////////
|
| +
|
| + // The length is always between these limits. There is no way
|
| + // to trigger the algorithm to set len outside this range.
|
| + assert(len >= MATCH_LEN_MIN);
|
| + assert(len <= MATCH_LEN_MAX);
|
| +
|
| + case SEQ_COPY:
|
| + // Repeat len bytes from distance of rep0.
|
| + if (unlikely(dict_repeat(&dict, rep0, &len))) {
|
| + coder->sequence = SEQ_COPY;
|
| + goto out;
|
| + }
|
| + }
|
| +
|
| + rc_normalize(SEQ_NORMALIZE);
|
| + coder->sequence = SEQ_IS_MATCH;
|
| +
|
| +out:
|
| + // Save state
|
| +
|
| + // NOTE: Must not copy dict.limit.
|
| + dictptr->pos = dict.pos;
|
| + dictptr->full = dict.full;
|
| +
|
| + rc_from_local(coder->rc, *in_pos);
|
| +
|
| + coder->state = state;
|
| + coder->rep0 = rep0;
|
| + coder->rep1 = rep1;
|
| + coder->rep2 = rep2;
|
| + coder->rep3 = rep3;
|
| +
|
| + coder->probs = probs;
|
| + coder->symbol = symbol;
|
| + coder->limit = limit;
|
| + coder->offset = offset;
|
| + coder->len = len;
|
| +
|
| + // Update the remaining amount of uncompressed data if uncompressed
|
| + // size was known.
|
| + if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
|
| + coder->uncompressed_size -= dict.pos - dict_start;
|
| +
|
| + // Since there cannot be end of payload marker if the
|
| + // uncompressed size was known, we check here if we
|
| + // finished decoding.
|
| + if (coder->uncompressed_size == 0 && ret == LZMA_OK
|
| + && coder->sequence != SEQ_NORMALIZE)
|
| + ret = coder->sequence == SEQ_IS_MATCH
|
| + ? LZMA_STREAM_END : LZMA_DATA_ERROR;
|
| + }
|
| +
|
| + // We can do an additional check in the range decoder to catch some
|
| + // corrupted files.
|
| + if (ret == LZMA_STREAM_END) {
|
| + if (!rc_is_finished(coder->rc))
|
| + ret = LZMA_DATA_ERROR;
|
| +
|
| + // Reset the range decoder so that it is ready to reinitialize
|
| + // for a new LZMA2 chunk.
|
| + rc_reset(coder->rc);
|
| + }
|
| +
|
| + return ret;
|
| +}
|
| +
|
| +
|
| +
|
| +static void
|
| +lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size)
|
| +{
|
| + coder->uncompressed_size = uncompressed_size;
|
| +}
|
| +
|
| +/*
|
| +extern void
|
| +lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
|
| +{
|
| + // This is hack.
|
| + (*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size;
|
| +}
|
| +*/
|
| +
|
| +static void
|
| +lzma_decoder_reset(lzma_coder *coder, const void *opt)
|
| +{
|
| + const lzma_options_lzma *options = opt;
|
| +
|
| + // NOTE: We assume that lc/lp/pb are valid since they were
|
| + // successfully decoded with lzma_lzma_decode_properties().
|
| + // FIXME?
|
| +
|
| + // Calculate pos_mask. We don't need pos_bits as is for anything.
|
| + coder->pos_mask = (1U << options->pb) - 1;
|
| +
|
| + // Initialize the literal decoder.
|
| + literal_init(coder->literal, options->lc, options->lp);
|
| +
|
| + coder->literal_context_bits = options->lc;
|
| + coder->literal_pos_mask = (1U << options->lp) - 1;
|
| +
|
| + // State
|
| + coder->state = STATE_LIT_LIT;
|
| + coder->rep0 = 0;
|
| + coder->rep1 = 0;
|
| + coder->rep2 = 0;
|
| + coder->rep3 = 0;
|
| + coder->pos_mask = (1U << options->pb) - 1;
|
| +
|
| + // Range decoder
|
| + rc_reset(coder->rc);
|
| +
|
| + // Bit and bittree decoders
|
| + for (uint32_t i = 0; i < STATES; ++i) {
|
| + for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
|
| + bit_reset(coder->is_match[i][j]);
|
| + bit_reset(coder->is_rep0_long[i][j]);
|
| + }
|
| +
|
| + bit_reset(coder->is_rep[i]);
|
| + bit_reset(coder->is_rep0[i]);
|
| + bit_reset(coder->is_rep1[i]);
|
| + bit_reset(coder->is_rep2[i]);
|
| + }
|
| +
|
| + for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i)
|
| + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS);
|
| +
|
| + for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)
|
| + bit_reset(coder->pos_special[i]);
|
| +
|
| + bittree_reset(coder->pos_align, ALIGN_BITS);
|
| +
|
| + // Len decoders (also bit/bittree)
|
| + const uint32_t num_pos_states = 1U << options->pb;
|
| + bit_reset(coder->match_len_decoder.choice);
|
| + bit_reset(coder->match_len_decoder.choice2);
|
| + bit_reset(coder->rep_len_decoder.choice);
|
| + bit_reset(coder->rep_len_decoder.choice2);
|
| +
|
| + for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
|
| + bittree_reset(coder->match_len_decoder.low[pos_state],
|
| + LEN_LOW_BITS);
|
| + bittree_reset(coder->match_len_decoder.mid[pos_state],
|
| + LEN_MID_BITS);
|
| +
|
| + bittree_reset(coder->rep_len_decoder.low[pos_state],
|
| + LEN_LOW_BITS);
|
| + bittree_reset(coder->rep_len_decoder.mid[pos_state],
|
| + LEN_MID_BITS);
|
| + }
|
| +
|
| + bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
|
| + bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
|
| +
|
| + coder->sequence = SEQ_IS_MATCH;
|
| + coder->probs = NULL;
|
| + coder->symbol = 0;
|
| + coder->limit = 0;
|
| + coder->offset = 0;
|
| + coder->len = 0;
|
| +
|
| + return;
|
| +}
|
| +
|
| +
|
| +extern lzma_ret
|
| +lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
| + const void *opt, lzma_lz_options *lz_options)
|
| +{
|
| + if (lz->coder == NULL) {
|
| + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
|
| + if (lz->coder == NULL)
|
| + return LZMA_MEM_ERROR;
|
| +
|
| + lz->code = &lzma_decode;
|
| + lz->reset = &lzma_decoder_reset;
|
| + lz->set_uncompressed = &lzma_decoder_uncompressed;
|
| + }
|
| +
|
| + // All dictionary sizes are OK here. LZ decoder will take care of
|
| + // the special cases.
|
| + const lzma_options_lzma *options = opt;
|
| + lz_options->dict_size = options->dict_size;
|
| + lz_options->preset_dict = options->preset_dict;
|
| + lz_options->preset_dict_size = options->preset_dict_size;
|
| +
|
| + return LZMA_OK;
|
| +}
|
| +
|
| +
|
| +/// Allocate and initialize LZMA decoder. This is used only via LZ
|
| +/// initialization (lzma_lzma_decoder_init() passes function pointer to
|
| +/// the LZ initialization).
|
| +static lzma_ret
|
| +lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
| + const void *options, lzma_lz_options *lz_options)
|
| +{
|
| + if (!is_lclppb_valid(options))
|
| + return LZMA_PROG_ERROR;
|
| +
|
| + return_if_error(lzma_lzma_decoder_create(
|
| + lz, allocator, options, lz_options));
|
| +
|
| + lzma_decoder_reset(lz->coder, options);
|
| + lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
|
| +
|
| + return LZMA_OK;
|
| +}
|
| +
|
| +
|
| +extern lzma_ret
|
| +lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
| + const lzma_filter_info *filters)
|
| +{
|
| + // LZMA can only be the last filter in the chain. This is enforced
|
| + // by the raw_decoder initialization.
|
| + assert(filters[1].init == NULL);
|
| +
|
| + return lzma_lz_decoder_init(next, allocator, filters,
|
| + &lzma_decoder_init);
|
| +}
|
| +
|
| +
|
| +extern bool
|
| +lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
|
| +{
|
| + if (byte > (4 * 5 + 4) * 9 + 8)
|
| + return true;
|
| +
|
| + // See the file format specification to understand this.
|
| + options->pb = byte / (9 * 5);
|
| + byte -= options->pb * 9 * 5;
|
| + options->lp = byte / 9;
|
| + options->lc = byte - options->lp * 9;
|
| +
|
| + return options->lc + options->lp > LZMA_LCLP_MAX;
|
| +}
|
| +
|
| +
|
| +extern uint64_t
|
| +lzma_lzma_decoder_memusage_nocheck(const void *options)
|
| +{
|
| + const lzma_options_lzma *const opt = options;
|
| + return sizeof(lzma_coder) + lzma_lz_decoder_memusage(opt->dict_size);
|
| +}
|
| +
|
| +
|
| +extern uint64_t
|
| +lzma_lzma_decoder_memusage(const void *options)
|
| +{
|
| + if (!is_lclppb_valid(options))
|
| + return UINT64_MAX;
|
| +
|
| + return lzma_lzma_decoder_memusage_nocheck(options);
|
| +}
|
| +
|
| +
|
| +extern lzma_ret
|
| +lzma_lzma_props_decode(void **options, lzma_allocator *allocator,
|
| + const uint8_t *props, size_t props_size)
|
| +{
|
| + if (props_size != 5)
|
| + return LZMA_OPTIONS_ERROR;
|
| +
|
| + lzma_options_lzma *opt
|
| + = lzma_alloc(sizeof(lzma_options_lzma), allocator);
|
| + if (opt == NULL)
|
| + return LZMA_MEM_ERROR;
|
| +
|
| + if (lzma_lzma_lclppb_decode(opt, props[0]))
|
| + goto error;
|
| +
|
| + // All dictionary sizes are accepted, including zero. LZ decoder
|
| + // will automatically use a dictionary at least a few KiB even if
|
| + // a smaller dictionary is requested.
|
| + opt->dict_size = unaligned_read32le(props + 1);
|
| +
|
| + opt->preset_dict = NULL;
|
| + opt->preset_dict_size = 0;
|
| +
|
| + *options = opt;
|
| +
|
| + return LZMA_OK;
|
| +
|
| +error:
|
| + lzma_free(opt, allocator);
|
| + return LZMA_OPTIONS_ERROR;
|
| +}
|
|
|
| Property changes on: xz/src/liblzma/lzma/lzma_decoder.c
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|