xz/src/liblzma/lzma/lzma_decoder.c - Issue 2869016: Add an unpatched version of xz, XZ Utils, to /trunk/deps/third_party

Unified Diff: xz/src/liblzma/lzma/lzma_decoder.c

Issue 2869016: Add an unpatched version of xz, XZ Utils, to /trunk/deps/third_party (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/

Patch Set: Created 10 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: xz/src/liblzma/lzma/lzma_decoder.c

===================================================================

--- xz/src/liblzma/lzma/lzma_decoder.c (revision 0)

+++ xz/src/liblzma/lzma/lzma_decoder.c (revision 0)

@@ -0,0 +1,1057 @@

+///////////////////////////////////////////////////////////////////////////////

+//

+/// \file lzma_decoder.c

+/// \brief LZMA decoder

+///

+// Authors: Igor Pavlov

+// Lasse Collin

+//

+// This file has been put into the public domain.

+// You can do whatever you want with this file.

+//

+///////////////////////////////////////////////////////////////////////////////

+#include "lz_decoder.h"

+#include "lzma_common.h"

+#include "lzma_decoder.h"

+#include "range_decoder.h"

+#ifdef HAVE_SMALL

+// Macros for (somewhat) size-optimized code.

+#define seq_4(seq) seq

+#define seq_6(seq) seq

+#define seq_8(seq) seq

+#define seq_len(seq) \

+ seq ## _CHOICE, \

+ seq ## _CHOICE2, \

+ seq ## _BITTREE

+#define len_decode(target, ld, pos_state, seq) \

+do { \

+case seq ## _CHOICE: \

+ rc_if_0(ld.choice, seq ## _CHOICE) { \

+ rc_update_0(ld.choice); \

+ probs = ld.low[pos_state];\

+ limit = LEN_LOW_SYMBOLS; \

+ target = MATCH_LEN_MIN; \

+ } else { \

+ rc_update_1(ld.choice); \

+case seq ## _CHOICE2: \

+ rc_if_0(ld.choice2, seq ## _CHOICE2) { \

+ rc_update_0(ld.choice2); \

+ probs = ld.mid[pos_state]; \

+ limit = LEN_MID_SYMBOLS; \

+ target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \

+ } else { \

+ rc_update_1(ld.choice2); \

+ probs = ld.high; \

+ limit = LEN_HIGH_SYMBOLS; \

+ target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \

+ + LEN_MID_SYMBOLS; \

+ } \

+ symbol = 1; \

+case seq ## _BITTREE: \

+ do { \

+ rc_bit(probs[symbol], , , seq ## _BITTREE); \

+ } while (symbol < limit); \

+ target += symbol - limit; \

+} while (0)

+#else // HAVE_SMALL

+// Unrolled versions

+#define seq_4(seq) \

+ seq ## 0, \

+ seq ## 1, \

+ seq ## 2, \

+ seq ## 3

+#define seq_6(seq) \

+ seq ## 0, \

+ seq ## 1, \

+ seq ## 2, \

+ seq ## 3, \

+ seq ## 4, \

+ seq ## 5

+#define seq_8(seq) \

+ seq ## 0, \

+ seq ## 1, \

+ seq ## 2, \

+ seq ## 3, \

+ seq ## 4, \

+ seq ## 5, \

+ seq ## 6, \

+ seq ## 7

+#define seq_len(seq) \

+ seq ## _CHOICE, \

+ seq ## _LOW0, \

+ seq ## _LOW1, \

+ seq ## _LOW2, \

+ seq ## _CHOICE2, \

+ seq ## _MID0, \

+ seq ## _MID1, \

+ seq ## _MID2, \

+ seq ## _HIGH0, \

+ seq ## _HIGH1, \

+ seq ## _HIGH2, \

+ seq ## _HIGH3, \

+ seq ## _HIGH4, \

+ seq ## _HIGH5, \

+ seq ## _HIGH6, \

+ seq ## _HIGH7

+#define len_decode(target, ld, pos_state, seq) \

+do { \

+ symbol = 1; \

+case seq ## _CHOICE: \

+ rc_if_0(ld.choice, seq ## _CHOICE) { \

+ rc_update_0(ld.choice); \

+ rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \

+ rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \

+ rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \

+ target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \

+ } else { \

+ rc_update_1(ld.choice); \

+case seq ## _CHOICE2: \

+ rc_if_0(ld.choice2, seq ## _CHOICE2) { \

+ rc_update_0(ld.choice2); \

+ rc_bit_case(ld.mid[pos_state][symbol], , , \

+ seq ## _MID0); \

+ rc_bit_case(ld.mid[pos_state][symbol], , , \

+ seq ## _MID1); \

+ rc_bit_case(ld.mid[pos_state][symbol], , , \

+ seq ## _MID2); \

+ target = symbol - LEN_MID_SYMBOLS \

+ + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \

+ } else { \

+ rc_update_1(ld.choice2); \

+ rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \

+ rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \

+ rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \

+ rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \

+ rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \

+ rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \

+ rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \

+ rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \

+ target = symbol - LEN_HIGH_SYMBOLS \

+ + MATCH_LEN_MIN \

+ + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \

+ } \

+} while (0)

+#endif // HAVE_SMALL

+/// Length decoder probabilities; see comments in lzma_common.h.

+typedef struct {

+ probability choice;

+ probability choice2;

+ probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];

+ probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];

+ probability high[LEN_HIGH_SYMBOLS];

+} lzma_length_decoder;

+struct lzma_coder_s {

+ ///////////////////

+ // Probabilities //

+ ///////////////////

+ /// Literals; see comments in lzma_common.h.

+ probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];

+ /// If 1, it's a match. Otherwise it's a single 8-bit literal.

+ probability is_match[STATES][POS_STATES_MAX];

+ /// If 1, it's a repeated match. The distance is one of rep0 .. rep3.

+ probability is_rep[STATES];

+ /// If 0, distance of a repeated match is rep0.

+ /// Otherwise check is_rep1.

+ probability is_rep0[STATES];

+ /// If 0, distance of a repeated match is rep1.

+ /// Otherwise check is_rep2.

+ probability is_rep1[STATES];

+ /// If 0, distance of a repeated match is rep2. Otherwise it is rep3.

+ probability is_rep2[STATES];

+ /// If 1, the repeated match has length of one byte. Otherwise

+ /// the length is decoded from rep_len_decoder.

+ probability is_rep0_long[STATES][POS_STATES_MAX];

+ /// Probability tree for the highest two bits of the match distance.

+ /// There is a separate probability tree for match lengths of

+ /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].

+ probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS];

+ /// Probability trees for additional bits for match distance when the

+ /// distance is in the range [4, 127].

+ probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX];

+ /// Probability tree for the lowest four bits of a match distance

+ /// that is equal to or greater than 128.

+ probability pos_align[ALIGN_TABLE_SIZE];

+ /// Length of a normal match

+ lzma_length_decoder match_len_decoder;

+ /// Length of a repeated match

+ lzma_length_decoder rep_len_decoder;

+ ///////////////////

+ // Decoder state //

+ ///////////////////

+ // Range coder

+ lzma_range_decoder rc;

+ // Types of the most recently seen LZMA symbols

+ lzma_lzma_state state;

+ uint32_t rep0; ///< Distance of the latest match

+ uint32_t rep1; ///< Distance of second latest match

+ uint32_t rep2; ///< Distance of third latest match

+ uint32_t rep3; ///< Distance of fourth latest match

+ uint32_t pos_mask; // (1U << pb) - 1

+ uint32_t literal_context_bits;

+ uint32_t literal_pos_mask;

+ /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of

+ /// payload marker is expected.

+ lzma_vli uncompressed_size;

+ ////////////////////////////////

+ // State of incomplete symbol //

+ ////////////////////////////////

+ /// Position where to continue the decoder loop

+ enum {

+ SEQ_NORMALIZE,

+ SEQ_IS_MATCH,

+ seq_8(SEQ_LITERAL),

+ seq_8(SEQ_LITERAL_MATCHED),

+ SEQ_LITERAL_WRITE,

+ SEQ_IS_REP,

+ seq_len(SEQ_MATCH_LEN),

+ seq_6(SEQ_POS_SLOT),

+ SEQ_POS_MODEL,

+ SEQ_DIRECT,

+ seq_4(SEQ_ALIGN),

+ SEQ_EOPM,

+ SEQ_IS_REP0,

+ SEQ_SHORTREP,

+ SEQ_IS_REP0_LONG,

+ SEQ_IS_REP1,

+ SEQ_IS_REP2,

+ seq_len(SEQ_REP_LEN),

+ SEQ_COPY,

+ } sequence;

+ /// Base of the current probability tree

+ probability *probs;

+ /// Symbol being decoded. This is also used as an index variable in

+ /// bittree decoders: probs[symbol]

+ uint32_t symbol;

+ /// Used as a loop termination condition on bittree decoders and

+ /// direct bits decoder.

+ uint32_t limit;

+ /// Matched literal decoder: 0x100 or 0 to help avoiding branches.

+ /// Bittree reverse decoders: Offset of the next bit: 1 << offset

+ uint32_t offset;

+ /// If decoding a literal: match byte.

+ /// If decoding a match: length of the match.

+ uint32_t len;

+};

+static lzma_ret

+lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,

+ const uint8_t *restrict in,

+ size_t *restrict in_pos, size_t in_size)

+ ////////////////////

+ // Initialization //

+ ////////////////////

+ if (!rc_read_init(&coder->rc, in, in_pos, in_size))

+ return LZMA_OK;

+ ///////////////

+ // Variables //

+ ///////////////

+ // Making local copies of often-used variables improves both

+ // speed and readability.

+ lzma_dict dict = *dictptr;

+ const size_t dict_start = dict.pos;

+ // Range decoder

+ rc_to_local(coder->rc, *in_pos);

+ // State

+ uint32_t state = coder->state;

+ uint32_t rep0 = coder->rep0;

+ uint32_t rep1 = coder->rep1;

+ uint32_t rep2 = coder->rep2;

+ uint32_t rep3 = coder->rep3;

+ const uint32_t pos_mask = coder->pos_mask;

+ // These variables are actually needed only if we last time ran

+ // out of input in the middle of the decoder loop.

+ probability *probs = coder->probs;

+ uint32_t symbol = coder->symbol;

+ uint32_t limit = coder->limit;

+ uint32_t offset = coder->offset;

+ uint32_t len = coder->len;

+ const uint32_t literal_pos_mask = coder->literal_pos_mask;

+ const uint32_t literal_context_bits = coder->literal_context_bits;

+ // Temporary variables

+ uint32_t pos_state = dict.pos & pos_mask;

+ lzma_ret ret = LZMA_OK;

+ // If uncompressed size is known, there must be no end of payload

+ // marker.

+ const bool no_eopm = coder->uncompressed_size

+ != LZMA_VLI_UNKNOWN;

+ if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)

+ dict.limit = dict.pos + (size_t)(coder->uncompressed_size);

+ // The main decoder loop. The "switch" is used to restart the decoder at

+ // correct location. Once restarted, the "switch" is no longer used.

+ switch (coder->sequence)

+ while (true) {

+ // Calculate new pos_state. This is skipped on the first loop

+ // since we already calculated it when setting up the local

+ // variables.

+ pos_state = dict.pos & pos_mask;

+ case SEQ_NORMALIZE:

+ case SEQ_IS_MATCH:

+ if (unlikely(no_eopm && dict.pos == dict.limit))

+ break;

+ rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {

+ rc_update_0(coder->is_match[state][pos_state]);

+ // It's a literal i.e. a single 8-bit byte.

+ probs = literal_subcoder(coder->literal,

+ literal_context_bits, literal_pos_mask,

+ dict.pos, dict_get(&dict, 0));

+ symbol = 1;

+ if (is_literal_state(state)) {

+ // Decode literal without match byte.

+#ifdef HAVE_SMALL

+ case SEQ_LITERAL:

+ do {

+ rc_bit(probs[symbol], , , SEQ_LITERAL);

+ } while (symbol < (1 << 8));

+#else

+ rc_bit_case(probs[symbol], , , SEQ_LITERAL0);

+ rc_bit_case(probs[symbol], , , SEQ_LITERAL1);

+ rc_bit_case(probs[symbol], , , SEQ_LITERAL2);

+ rc_bit_case(probs[symbol], , , SEQ_LITERAL3);

+ rc_bit_case(probs[symbol], , , SEQ_LITERAL4);

+ rc_bit_case(probs[symbol], , , SEQ_LITERAL5);

+ rc_bit_case(probs[symbol], , , SEQ_LITERAL6);

+ rc_bit_case(probs[symbol], , , SEQ_LITERAL7);

+#endif

+ } else {

+ // Decode literal with match byte.

+ //

+ // We store the byte we compare against

+ // ("match byte") to "len" to minimize the

+ // number of variables we need to store

+ // between decoder calls.

+ len = dict_get(&dict, rep0) << 1;

+ // The usage of "offset" allows omitting some

+ // branches, which should give tiny speed

+ // improvement on some CPUs. "offset" gets

+ // set to zero if match_bit didn't match.

+ offset = 0x100;

+#ifdef HAVE_SMALL

+ case SEQ_LITERAL_MATCHED:

+ do {

+ const uint32_t match_bit

+ = len & offset;

+ const uint32_t subcoder_index

+ = offset + match_bit

+ + symbol;

+ rc_bit(probs[subcoder_index],

+ offset &= ~match_bit,

+ offset &= match_bit,

+ SEQ_LITERAL_MATCHED);

+ // It seems to be faster to do this

+ // here instead of putting it to the

+ // beginning of the loop and then

+ // putting the "case" in the middle

+ // of the loop.

+ len <<= 1;

+ } while (symbol < (1 << 8));

+#else

+ // Unroll the loop.

+ uint32_t match_bit;

+ uint32_t subcoder_index;

+# define d(seq) \

+ case seq: \

+ match_bit = len & offset; \

+ subcoder_index = offset + match_bit + symbol; \

+ rc_bit(probs[subcoder_index], \

+ offset &= ~match_bit, \

+ offset &= match_bit, \

+ seq)

+ d(SEQ_LITERAL_MATCHED0);

+ len <<= 1;

+ d(SEQ_LITERAL_MATCHED1);

+ len <<= 1;

+ d(SEQ_LITERAL_MATCHED2);

+ len <<= 1;

+ d(SEQ_LITERAL_MATCHED3);

+ len <<= 1;

+ d(SEQ_LITERAL_MATCHED4);

+ len <<= 1;

+ d(SEQ_LITERAL_MATCHED5);

+ len <<= 1;

+ d(SEQ_LITERAL_MATCHED6);

+ len <<= 1;

+ d(SEQ_LITERAL_MATCHED7);

+# undef d

+#endif

+ }

+ //update_literal(state);

+ // Use a lookup table to update to literal state,

+ // since compared to other state updates, this would

+ // need two branches.

+ static const lzma_lzma_state next_state[] = {

+ STATE_LIT_LIT,

+ STATE_MATCH_LIT_LIT,

+ STATE_REP_LIT_LIT,

+ STATE_SHORTREP_LIT_LIT,

+ STATE_MATCH_LIT,

+ STATE_REP_LIT,

+ STATE_SHORTREP_LIT,

+ STATE_MATCH_LIT,

+ STATE_REP_LIT

+ };

+ state = next_state[state];

+ case SEQ_LITERAL_WRITE:

+ if (unlikely(dict_put(&dict, symbol))) {

+ coder->sequence = SEQ_LITERAL_WRITE;

+ goto out;

+ }

+ continue;

+ }

+ // Instead of a new byte we are going to get a byte range

+ // (distance and length) which will be repeated from our

+ // output history.

+ rc_update_1(coder->is_match[state][pos_state]);

+ case SEQ_IS_REP:

+ rc_if_0(coder->is_rep[state], SEQ_IS_REP) {

+ // Not a repeated match

+ rc_update_0(coder->is_rep[state]);

+ update_match(state);

+ // The latest three match distances are kept in

+ // memory in case there are repeated matches.

+ rep3 = rep2;

+ rep2 = rep1;

+ rep1 = rep0;

+ // Decode the length of the match.

+ len_decode(len, coder->match_len_decoder,

+ pos_state, SEQ_MATCH_LEN);

+ // Prepare to decode the highest two bits of the

+ // match distance.

+ probs = coder->pos_slot[get_len_to_pos_state(len)];

+ symbol = 1;

+#ifdef HAVE_SMALL

+ case SEQ_POS_SLOT:

+ do {

+ rc_bit(probs[symbol], , , SEQ_POS_SLOT);

+ } while (symbol < POS_SLOTS);

+#else

+ rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0);

+ rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1);

+ rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2);

+ rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3);

+ rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4);

+ rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5);

+#endif

+ // Get rid of the highest bit that was needed for

+ // indexing of the probability array.

+ symbol -= POS_SLOTS;

+ assert(symbol <= 63);

+ if (symbol < START_POS_MODEL_INDEX) {

+ // Match distances [0, 3] have only two bits.

+ rep0 = symbol;

+ } else {

+ // Decode the lowest [1, 29] bits of

+ // the match distance.

+ limit = (symbol >> 1) - 1;

+ assert(limit >= 1 && limit <= 30);

+ rep0 = 2 + (symbol & 1);

+ if (symbol < END_POS_MODEL_INDEX) {

+ // Prepare to decode the low bits for

+ // a distance of [4, 127].

+ assert(limit <= 5);

+ rep0 <<= limit;

+ assert(rep0 <= 96);

+ // -1 is fine, because we start

+ // decoding at probs[1], not probs[0].

+ // NOTE: This violates the C standard,

+ // since we are doing pointer

+ // arithmetic past the beginning of

+ // the array.

+ assert((int32_t)(rep0 - symbol - 1)

+ >= -1);

+ assert((int32_t)(rep0 - symbol - 1)

+ <= 82);

+ probs = coder->pos_special + rep0

+ - symbol - 1;

+ symbol = 1;

+ offset = 0;

+ case SEQ_POS_MODEL:

+#ifdef HAVE_SMALL

+ do {

+ rc_bit(probs[symbol], ,

+ rep0 += 1 << offset,

+ SEQ_POS_MODEL);

+ } while (++offset < limit);

+#else

+ switch (limit) {

+ case 5:

+ assert(offset == 0);

+ rc_bit(probs[symbol], ,

+ rep0 += 1,

+ SEQ_POS_MODEL);

+ ++offset;

+ --limit;

+ case 4:

+ rc_bit(probs[symbol], ,

+ rep0 += 1 << offset,

+ SEQ_POS_MODEL);

+ ++offset;

+ --limit;

+ case 3:

+ rc_bit(probs[symbol], ,

+ rep0 += 1 << offset,

+ SEQ_POS_MODEL);

+ ++offset;

+ --limit;

+ case 2:

+ rc_bit(probs[symbol], ,

+ rep0 += 1 << offset,

+ SEQ_POS_MODEL);

+ ++offset;

+ --limit;

+ case 1:

+ // We need "symbol" only for

+ // indexing the probability

+ // array, thus we can use

+ // rc_bit_last() here to omit

+ // the unneeded updating of

+ // "symbol".

+ rc_bit_last(probs[symbol], ,

+ rep0 += 1 << offset,

+ SEQ_POS_MODEL);

+ }

+#endif

+ } else {

+ // The distance is >= 128. Decode the

+ // lower bits without probabilities

+ // except the lowest four bits.

+ assert(symbol >= 14);

+ assert(limit >= 6);

+ limit -= ALIGN_BITS;

+ assert(limit >= 2);

+ case SEQ_DIRECT:

+ // Not worth manual unrolling

+ do {

+ rc_direct(rep0, SEQ_DIRECT);

+ } while (--limit > 0);

+ // Decode the lowest four bits using

+ // probabilities.

+ rep0 <<= ALIGN_BITS;

+ symbol = 1;

+#ifdef HAVE_SMALL

+ offset = 0;

+ case SEQ_ALIGN:

+ do {

+ rc_bit(coder->pos_align[

+ symbol], ,

+ rep0 += 1 << offset,

+ SEQ_ALIGN);

+ } while (++offset < ALIGN_BITS);

+#else

+ case SEQ_ALIGN0:

+ rc_bit(coder->pos_align[symbol], ,

+ rep0 += 1, SEQ_ALIGN0);

+ case SEQ_ALIGN1:

+ rc_bit(coder->pos_align[symbol], ,

+ rep0 += 2, SEQ_ALIGN1);

+ case SEQ_ALIGN2:

+ rc_bit(coder->pos_align[symbol], ,

+ rep0 += 4, SEQ_ALIGN2);

+ case SEQ_ALIGN3:

+ // Like in SEQ_POS_MODEL, we don't

+ // need "symbol" for anything else

+ // than indexing the probability array.

+ rc_bit_last(coder->pos_align[symbol], ,

+ rep0 += 8, SEQ_ALIGN3);

+#endif

+ if (rep0 == UINT32_MAX) {

+ // End of payload marker was

+ // found. It must not be

+ // present if uncompressed

+ // size is known.

+ if (coder->uncompressed_size

+ != LZMA_VLI_UNKNOWN) {

+ ret = LZMA_DATA_ERROR;

+ goto out;

+ }

+ case SEQ_EOPM:

+ // TODO Comment

+ rc_normalize(SEQ_EOPM);

+ ret = LZMA_STREAM_END;

+ goto out;

+ }

+ // Validate the distance we just decoded.

+ if (unlikely(!dict_is_distance_valid(&dict, rep0))) {

+ ret = LZMA_DATA_ERROR;

+ goto out;

+ }

+ } else {

+ rc_update_1(coder->is_rep[state]);

+ // Repeated match

+ //

+ // The match distance is a value that we have had

+ // earlier. The latest four match distances are

+ // available as rep0, rep1, rep2 and rep3. We will

+ // now decode which of them is the new distance.

+ //

+ // There cannot be a match if we haven't produced

+ // any output, so check that first.

+ if (unlikely(!dict_is_distance_valid(&dict, 0))) {

+ ret = LZMA_DATA_ERROR;

+ goto out;

+ }

+ case SEQ_IS_REP0:

+ rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {

+ rc_update_0(coder->is_rep0[state]);

+ // The distance is rep0.

+ case SEQ_IS_REP0_LONG:

+ rc_if_0(coder->is_rep0_long[state][pos_state],

+ SEQ_IS_REP0_LONG) {

+ rc_update_0(coder->is_rep0_long[

+ state][pos_state]);

+ update_short_rep(state);

+ case SEQ_SHORTREP:

+ if (unlikely(dict_put(&dict, dict_get(

+ &dict, rep0)))) {

+ coder->sequence = SEQ_SHORTREP;

+ goto out;

+ }

+ continue;

+ }

+ // Repeating more than one byte at

+ // distance of rep0.

+ rc_update_1(coder->is_rep0_long[

+ state][pos_state]);

+ } else {

+ rc_update_1(coder->is_rep0[state]);

+ case SEQ_IS_REP1:

+ // The distance is rep1, rep2 or rep3. Once

+ // we find out which one of these three, it

+ // is stored to rep0 and rep1, rep2 and rep3

+ // are updated accordingly.

+ rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {

+ rc_update_0(coder->is_rep1[state]);

+ const uint32_t distance = rep1;

+ rep1 = rep0;

+ rep0 = distance;

+ } else {

+ rc_update_1(coder->is_rep1[state]);

+ case SEQ_IS_REP2:

+ rc_if_0(coder->is_rep2[state],

+ SEQ_IS_REP2) {

+ rc_update_0(coder->is_rep2[

+ state]);

+ const uint32_t distance = rep2;

+ rep2 = rep1;

+ rep1 = rep0;

+ rep0 = distance;

+ } else {

+ rc_update_1(coder->is_rep2[

+ state]);

+ const uint32_t distance = rep3;

+ rep3 = rep2;

+ rep2 = rep1;

+ rep1 = rep0;

+ rep0 = distance;

+ }

+ update_long_rep(state);

+ // Decode the length of the repeated match.

+ len_decode(len, coder->rep_len_decoder,

+ pos_state, SEQ_REP_LEN);

+ }

+ /////////////////////////////////

+ // Repeat from history buffer. //

+ /////////////////////////////////

+ // The length is always between these limits. There is no way

+ // to trigger the algorithm to set len outside this range.

+ assert(len >= MATCH_LEN_MIN);

+ assert(len <= MATCH_LEN_MAX);

+ case SEQ_COPY:

+ // Repeat len bytes from distance of rep0.

+ if (unlikely(dict_repeat(&dict, rep0, &len))) {

+ coder->sequence = SEQ_COPY;

+ goto out;

+ }

+ rc_normalize(SEQ_NORMALIZE);

+ coder->sequence = SEQ_IS_MATCH;

+out:

+ // Save state

+ // NOTE: Must not copy dict.limit.

+ dictptr->pos = dict.pos;

+ dictptr->full = dict.full;

+ rc_from_local(coder->rc, *in_pos);

+ coder->state = state;

+ coder->rep0 = rep0;

+ coder->rep1 = rep1;

+ coder->rep2 = rep2;

+ coder->rep3 = rep3;

+ coder->probs = probs;

+ coder->symbol = symbol;

+ coder->limit = limit;

+ coder->offset = offset;

+ coder->len = len;

+ // Update the remaining amount of uncompressed data if uncompressed

+ // size was known.

+ if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {

+ coder->uncompressed_size -= dict.pos - dict_start;

+ // Since there cannot be end of payload marker if the

+ // uncompressed size was known, we check here if we

+ // finished decoding.

+ if (coder->uncompressed_size == 0 && ret == LZMA_OK

+ && coder->sequence != SEQ_NORMALIZE)

+ ret = coder->sequence == SEQ_IS_MATCH

+ ? LZMA_STREAM_END : LZMA_DATA_ERROR;

+ }

+ // We can do an additional check in the range decoder to catch some

+ // corrupted files.

+ if (ret == LZMA_STREAM_END) {

+ if (!rc_is_finished(coder->rc))

+ ret = LZMA_DATA_ERROR;

+ // Reset the range decoder so that it is ready to reinitialize

+ // for a new LZMA2 chunk.

+ rc_reset(coder->rc);

+ }

+ return ret;

+static void

+lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size)

+ coder->uncompressed_size = uncompressed_size;

+/*

+extern void

+lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)

+ // This is hack.

+ (*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size;

+*/

+static void

+lzma_decoder_reset(lzma_coder *coder, const void *opt)

+ const lzma_options_lzma *options = opt;

+ // NOTE: We assume that lc/lp/pb are valid since they were

+ // successfully decoded with lzma_lzma_decode_properties().

+ // FIXME?

+ // Calculate pos_mask. We don't need pos_bits as is for anything.

+ coder->pos_mask = (1U << options->pb) - 1;

+ // Initialize the literal decoder.

+ literal_init(coder->literal, options->lc, options->lp);

+ coder->literal_context_bits = options->lc;

+ coder->literal_pos_mask = (1U << options->lp) - 1;

+ // State

+ coder->state = STATE_LIT_LIT;

+ coder->rep0 = 0;

+ coder->rep1 = 0;

+ coder->rep2 = 0;

+ coder->rep3 = 0;

+ coder->pos_mask = (1U << options->pb) - 1;

+ // Range decoder

+ rc_reset(coder->rc);

+ // Bit and bittree decoders

+ for (uint32_t i = 0; i < STATES; ++i) {

+ for (uint32_t j = 0; j <= coder->pos_mask; ++j) {

+ bit_reset(coder->is_match[i][j]);

+ bit_reset(coder->is_rep0_long[i][j]);

+ }

+ bit_reset(coder->is_rep[i]);

+ bit_reset(coder->is_rep0[i]);

+ bit_reset(coder->is_rep1[i]);

+ bit_reset(coder->is_rep2[i]);

+ }

+ for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i)

+ bittree_reset(coder->pos_slot[i], POS_SLOT_BITS);

+ for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)

+ bit_reset(coder->pos_special[i]);

+ bittree_reset(coder->pos_align, ALIGN_BITS);

+ // Len decoders (also bit/bittree)

+ const uint32_t num_pos_states = 1U << options->pb;

+ bit_reset(coder->match_len_decoder.choice);

+ bit_reset(coder->match_len_decoder.choice2);

+ bit_reset(coder->rep_len_decoder.choice);

+ bit_reset(coder->rep_len_decoder.choice2);

+ for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {

+ bittree_reset(coder->match_len_decoder.low[pos_state],

+ LEN_LOW_BITS);

+ bittree_reset(coder->match_len_decoder.mid[pos_state],

+ LEN_MID_BITS);

+ bittree_reset(coder->rep_len_decoder.low[pos_state],

+ LEN_LOW_BITS);

+ bittree_reset(coder->rep_len_decoder.mid[pos_state],

+ LEN_MID_BITS);

+ }

+ bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);

+ bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);

+ coder->sequence = SEQ_IS_MATCH;

+ coder->probs = NULL;

+ coder->symbol = 0;

+ coder->limit = 0;

+ coder->offset = 0;

+ coder->len = 0;

+ return;

+extern lzma_ret

+lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,

+ const void *opt, lzma_lz_options *lz_options)

+ if (lz->coder == NULL) {

+ lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);

+ if (lz->coder == NULL)

+ return LZMA_MEM_ERROR;

+ lz->code = &lzma_decode;

+ lz->reset = &lzma_decoder_reset;

+ lz->set_uncompressed = &lzma_decoder_uncompressed;

+ }

+ // All dictionary sizes are OK here. LZ decoder will take care of

+ // the special cases.

+ const lzma_options_lzma *options = opt;

+ lz_options->dict_size = options->dict_size;

+ lz_options->preset_dict = options->preset_dict;

+ lz_options->preset_dict_size = options->preset_dict_size;

+ return LZMA_OK;

+/// Allocate and initialize LZMA decoder. This is used only via LZ

+/// initialization (lzma_lzma_decoder_init() passes function pointer to

+/// the LZ initialization).

+static lzma_ret

+lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,

+ const void *options, lzma_lz_options *lz_options)

+ if (!is_lclppb_valid(options))

+ return LZMA_PROG_ERROR;

+ return_if_error(lzma_lzma_decoder_create(

+ lz, allocator, options, lz_options));

+ lzma_decoder_reset(lz->coder, options);

+ lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);

+ return LZMA_OK;

+extern lzma_ret

+lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,

+ const lzma_filter_info *filters)

+ // LZMA can only be the last filter in the chain. This is enforced

+ // by the raw_decoder initialization.

+ assert(filters[1].init == NULL);

+ return lzma_lz_decoder_init(next, allocator, filters,

+ &lzma_decoder_init);

+extern bool

+lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)

+ if (byte > (4 * 5 + 4) * 9 + 8)

+ return true;

+ // See the file format specification to understand this.

+ options->pb = byte / (9 * 5);

+ byte -= options->pb * 9 * 5;

+ options->lp = byte / 9;

+ options->lc = byte - options->lp * 9;

+ return options->lc + options->lp > LZMA_LCLP_MAX;

+extern uint64_t

+lzma_lzma_decoder_memusage_nocheck(const void *options)

+ const lzma_options_lzma *const opt = options;

+ return sizeof(lzma_coder) + lzma_lz_decoder_memusage(opt->dict_size);

+extern uint64_t

+lzma_lzma_decoder_memusage(const void *options)

+ if (!is_lclppb_valid(options))

+ return UINT64_MAX;

+ return lzma_lzma_decoder_memusage_nocheck(options);

+extern lzma_ret

+lzma_lzma_props_decode(void **options, lzma_allocator *allocator,

+ const uint8_t *props, size_t props_size)

+ if (props_size != 5)

+ return LZMA_OPTIONS_ERROR;

+ lzma_options_lzma *opt

+ = lzma_alloc(sizeof(lzma_options_lzma), allocator);

+ if (opt == NULL)

+ return LZMA_MEM_ERROR;

+ if (lzma_lzma_lclppb_decode(opt, props[0]))

+ goto error;

+ // All dictionary sizes are accepted, including zero. LZ decoder

+ // will automatically use a dictionary at least a few KiB even if

+ // a smaller dictionary is requested.

+ opt->dict_size = unaligned_read32le(props + 1);

+ opt->preset_dict = NULL;

+ opt->preset_dict_size = 0;

+ *options = opt;

+ return LZMA_OK;

+error:

+ lzma_free(opt, allocator);

+ return LZMA_OPTIONS_ERROR;

Property changes on: xz/src/liblzma/lzma/lzma_decoder.c

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « xz/src/liblzma/lzma/lzma_decoder.h ('k') | xz/src/liblzma/lzma/lzma_encoder.h » ('j') | no next file with comments »