xz/src/liblzma/lzma/lzma_decoder.c - Issue 2869016: Add an unpatched version of xz, XZ Utils, to /trunk/deps/third_party

Side by Side Diff: xz/src/liblzma/lzma/lzma_decoder.c

Issue 2869016: Add an unpatched version of xz, XZ Utils, to /trunk/deps/third_party (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/

Patch Set: Created 10 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 ///////////////////////////////////////////////////////////////////////////////

	2 //

	3 /// \file lzma_decoder.c

	4 /// \brief LZMA decoder

	5 ///

	6 // Authors: Igor Pavlov

	7 // Lasse Collin

	8 //

	9 // This file has been put into the public domain.

	10 // You can do whatever you want with this file.

	11 //

	12 ///////////////////////////////////////////////////////////////////////////////

	13

	14 #include "lz_decoder.h"

	15 #include "lzma_common.h"

	16 #include "lzma_decoder.h"

	17 #include "range_decoder.h"

	18

	19

	20 #ifdef HAVE_SMALL

	21

	22 // Macros for (somewhat) size-optimized code.

	23 #define seq_4(seq) seq

	24

	25 #define seq_6(seq) seq

	26

	27 #define seq_8(seq) seq

	28

	29 #define seq_len(seq) \

	30 seq ## _CHOICE, \

	31 seq ## _CHOICE2, \

	32 seq ## _BITTREE

	33

	34 #define len_decode(target, ld, pos_state, seq) \

	35 do { \

	36 case seq ## _CHOICE: \

	37 rc_if_0(ld.choice, seq ## _CHOICE) { \

	38 rc_update_0(ld.choice); \

	39 probs = ld.low[pos_state];\

	40 limit = LEN_LOW_SYMBOLS; \

	41 target = MATCH_LEN_MIN; \

	42 } else { \

	43 rc_update_1(ld.choice); \

	44 case seq ## _CHOICE2: \

	45 rc_if_0(ld.choice2, seq ## _CHOICE2) { \

	46 rc_update_0(ld.choice2); \

	47 probs = ld.mid[pos_state]; \

	48 limit = LEN_MID_SYMBOLS; \

	49 target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \

	50 } else { \

	51 rc_update_1(ld.choice2); \

	52 probs = ld.high; \

	53 limit = LEN_HIGH_SYMBOLS; \

	54 target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \

	55 + LEN_MID_SYMBOLS; \

	56 } \

	57 } \

	58 symbol = 1; \

	59 case seq ## _BITTREE: \

	60 do { \

	61 rc_bit(probs[symbol], , , seq ## _BITTREE); \

	62 } while (symbol < limit); \

	63 target += symbol - limit; \

	64 } while (0)

	65

	66 #else // HAVE_SMALL

	67

	68 // Unrolled versions

	69 #define seq_4(seq) \

	70 seq ## 0, \

	71 seq ## 1, \

	72 seq ## 2, \

	73 seq ## 3

	74

	75 #define seq_6(seq) \

	76 seq ## 0, \

	77 seq ## 1, \

	78 seq ## 2, \

	79 seq ## 3, \

	80 seq ## 4, \

	81 seq ## 5

	82

	83 #define seq_8(seq) \

	84 seq ## 0, \

	85 seq ## 1, \

	86 seq ## 2, \

	87 seq ## 3, \

	88 seq ## 4, \

	89 seq ## 5, \

	90 seq ## 6, \

	91 seq ## 7

	92

	93 #define seq_len(seq) \

	94 seq ## _CHOICE, \

	95 seq ## _LOW0, \

	96 seq ## _LOW1, \

	97 seq ## _LOW2, \

	98 seq ## _CHOICE2, \

	99 seq ## _MID0, \

	100 seq ## _MID1, \

	101 seq ## _MID2, \

	102 seq ## _HIGH0, \

	103 seq ## _HIGH1, \

	104 seq ## _HIGH2, \

	105 seq ## _HIGH3, \

	106 seq ## _HIGH4, \

	107 seq ## _HIGH5, \

	108 seq ## _HIGH6, \

	109 seq ## _HIGH7

	110

	111 #define len_decode(target, ld, pos_state, seq) \

	112 do { \

	113 symbol = 1; \

	114 case seq ## _CHOICE: \

	115 rc_if_0(ld.choice, seq ## _CHOICE) { \

	116 rc_update_0(ld.choice); \

	117 rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \

	118 rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \

	119 rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \

	120 target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \

	121 } else { \

	122 rc_update_1(ld.choice); \

	123 case seq ## _CHOICE2: \

	124 rc_if_0(ld.choice2, seq ## _CHOICE2) { \

	125 rc_update_0(ld.choice2); \

	126 rc_bit_case(ld.mid[pos_state][symbol], , , \

	127 seq ## _MID0); \

	128 rc_bit_case(ld.mid[pos_state][symbol], , , \

	129 seq ## _MID1); \

	130 rc_bit_case(ld.mid[pos_state][symbol], , , \

	131 seq ## _MID2); \

	132 target = symbol - LEN_MID_SYMBOLS \

	133 + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \

	134 } else { \

	135 rc_update_1(ld.choice2); \

	136 rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \

	137 rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \

	138 rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \

	139 rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \

	140 rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \

	141 rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \

	142 rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \

	143 rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \

	144 target = symbol - LEN_HIGH_SYMBOLS \

	145 + MATCH_LEN_MIN \

	146 + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \

	147 } \

	148 } \

	149 } while (0)

	150

	151 #endif // HAVE_SMALL

	152

	153

	154 /// Length decoder probabilities; see comments in lzma_common.h.

	155 typedef struct {

	156 probability choice;

	157 probability choice2;

	158 probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];

	159 probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];

	160 probability high[LEN_HIGH_SYMBOLS];

	161 } lzma_length_decoder;

	162

	163

	164 struct lzma_coder_s {

	165 ///////////////////

	166 // Probabilities //

	167 ///////////////////

	168

	169 /// Literals; see comments in lzma_common.h.

	170 probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];

	171

	172 /// If 1, it's a match. Otherwise it's a single 8-bit literal.

	173 probability is_match[STATES][POS_STATES_MAX];

	174

	175 /// If 1, it's a repeated match. The distance is one of rep0 .. rep3.

	176 probability is_rep[STATES];

	177

	178 /// If 0, distance of a repeated match is rep0.

	179 /// Otherwise check is_rep1.

	180 probability is_rep0[STATES];

	181

	182 /// If 0, distance of a repeated match is rep1.

	183 /// Otherwise check is_rep2.

	184 probability is_rep1[STATES];

	185

	186 /// If 0, distance of a repeated match is rep2. Otherwise it is rep3.

	187 probability is_rep2[STATES];

	188

	189 /// If 1, the repeated match has length of one byte. Otherwise

	190 /// the length is decoded from rep_len_decoder.

	191 probability is_rep0_long[STATES][POS_STATES_MAX];

	192

	193 /// Probability tree for the highest two bits of the match distance.

	194 /// There is a separate probability tree for match lengths of

	195 /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].

	196 probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS];

	197

	198 /// Probability trees for additional bits for match distance when the

	199 /// distance is in the range [4, 127].

	200 probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX];

	201

	202 /// Probability tree for the lowest four bits of a match distance

	203 /// that is equal to or greater than 128.

	204 probability pos_align[ALIGN_TABLE_SIZE];

	205

	206 /// Length of a normal match

	207 lzma_length_decoder match_len_decoder;

	208

	209 /// Length of a repeated match

	210 lzma_length_decoder rep_len_decoder;

	211

	212 ///////////////////

	213 // Decoder state //

	214 ///////////////////

	215

	216 // Range coder

	217 lzma_range_decoder rc;

	218

	219 // Types of the most recently seen LZMA symbols

	220 lzma_lzma_state state;

	221

	222 uint32_t rep0; ///< Distance of the latest match

	223 uint32_t rep1; ///< Distance of second latest match

	224 uint32_t rep2; ///< Distance of third latest match

	225 uint32_t rep3; ///< Distance of fourth latest match

	226

	227 uint32_t pos_mask; // (1U << pb) - 1

	228 uint32_t literal_context_bits;

	229 uint32_t literal_pos_mask;

	230

	231 /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of

	232 /// payload marker is expected.

	233 lzma_vli uncompressed_size;

	234

	235 ////////////////////////////////

	236 // State of incomplete symbol //

	237 ////////////////////////////////

	238

	239 /// Position where to continue the decoder loop

	240 enum {

	241 SEQ_NORMALIZE,

	242 SEQ_IS_MATCH,

	243 seq_8(SEQ_LITERAL),

	244 seq_8(SEQ_LITERAL_MATCHED),

	245 SEQ_LITERAL_WRITE,

	246 SEQ_IS_REP,

	247 seq_len(SEQ_MATCH_LEN),

	248 seq_6(SEQ_POS_SLOT),

	249 SEQ_POS_MODEL,

	250 SEQ_DIRECT,

	251 seq_4(SEQ_ALIGN),

	252 SEQ_EOPM,

	253 SEQ_IS_REP0,

	254 SEQ_SHORTREP,

	255 SEQ_IS_REP0_LONG,

	256 SEQ_IS_REP1,

	257 SEQ_IS_REP2,

	258 seq_len(SEQ_REP_LEN),

	259 SEQ_COPY,

	260 } sequence;

	261

	262 /// Base of the current probability tree

	263 probability *probs;

	264

	265 /// Symbol being decoded. This is also used as an index variable in

	266 /// bittree decoders: probs[symbol]

	267 uint32_t symbol;

	268

	269 /// Used as a loop termination condition on bittree decoders and

	270 /// direct bits decoder.

	271 uint32_t limit;

	272

	273 /// Matched literal decoder: 0x100 or 0 to help avoiding branches.

	274 /// Bittree reverse decoders: Offset of the next bit: 1 << offset

	275 uint32_t offset;

	276

	277 /// If decoding a literal: match byte.

	278 /// If decoding a match: length of the match.

	279 uint32_t len;

	280 };

	281

	282

	283 static lzma_ret

	284 lzma_decode(lzma_coder restrict coder, lzma_dict restrict dictptr,

	285 const uint8_t *restrict in,

	286 size_t *restrict in_pos, size_t in_size)

	287 {

	288 ////////////////////

	289 // Initialization //

	290 ////////////////////

	291

	292 if (!rc_read_init(&coder->rc, in, in_pos, in_size))

	293 return LZMA_OK;

	294

	295 ///////////////

	296 // Variables //

	297 ///////////////

	298

	299 // Making local copies of often-used variables improves both

	300 // speed and readability.

	301

	302 lzma_dict dict = *dictptr;

	303

	304 const size_t dict_start = dict.pos;

	305

	306 // Range decoder

	307 rc_to_local(coder->rc, *in_pos);

	308

	309 // State

	310 uint32_t state = coder->state;

	311 uint32_t rep0 = coder->rep0;

	312 uint32_t rep1 = coder->rep1;

	313 uint32_t rep2 = coder->rep2;

	314 uint32_t rep3 = coder->rep3;

	315

	316 const uint32_t pos_mask = coder->pos_mask;

	317

	318 // These variables are actually needed only if we last time ran

	319 // out of input in the middle of the decoder loop.

	320 probability *probs = coder->probs;

	321 uint32_t symbol = coder->symbol;

	322 uint32_t limit = coder->limit;

	323 uint32_t offset = coder->offset;

	324 uint32_t len = coder->len;

	325

	326 const uint32_t literal_pos_mask = coder->literal_pos_mask;

	327 const uint32_t literal_context_bits = coder->literal_context_bits;

	328

	329 // Temporary variables

	330 uint32_t pos_state = dict.pos & pos_mask;

	331

	332 lzma_ret ret = LZMA_OK;

	333

	334 // If uncompressed size is known, there must be no end of payload

	335 // marker.

	336 const bool no_eopm = coder->uncompressed_size

	337 != LZMA_VLI_UNKNOWN;

	338 if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)

	339 dict.limit = dict.pos + (size_t)(coder->uncompressed_size);

	340

	341 // The main decoder loop. The "switch" is used to restart the decoder at

	342 // correct location. Once restarted, the "switch" is no longer used.

	343 switch (coder->sequence)

	344 while (true) {

	345 // Calculate new pos_state. This is skipped on the first loop

	346 // since we already calculated it when setting up the local

	347 // variables.

	348 pos_state = dict.pos & pos_mask;

	349

	350 case SEQ_NORMALIZE:

	351 case SEQ_IS_MATCH:

	352 if (unlikely(no_eopm && dict.pos == dict.limit))

	353 break;

	354

	355 rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {

	356 rc_update_0(coder->is_match[state][pos_state]);

	357

	358 // It's a literal i.e. a single 8-bit byte.

	359

	360 probs = literal_subcoder(coder->literal,

	361 literal_context_bits, literal_pos_mask,

	362 dict.pos, dict_get(&dict, 0));

	363 symbol = 1;

	364

	365 if (is_literal_state(state)) {

	366 // Decode literal without match byte.

	367 #ifdef HAVE_SMALL

	368 case SEQ_LITERAL:

	369 do {

	370 rc_bit(probs[symbol], , , SEQ_LITERAL);

	371 } while (symbol < (1 << 8));

	372 #else

	373 rc_bit_case(probs[symbol], , , SEQ_LITERAL0);

	374 rc_bit_case(probs[symbol], , , SEQ_LITERAL1);

	375 rc_bit_case(probs[symbol], , , SEQ_LITERAL2);

	376 rc_bit_case(probs[symbol], , , SEQ_LITERAL3);

	377 rc_bit_case(probs[symbol], , , SEQ_LITERAL4);

	378 rc_bit_case(probs[symbol], , , SEQ_LITERAL5);

	379 rc_bit_case(probs[symbol], , , SEQ_LITERAL6);

	380 rc_bit_case(probs[symbol], , , SEQ_LITERAL7);

	381 #endif

	382 } else {

	383 // Decode literal with match byte.

	384 //

	385 // We store the byte we compare against

	386 // ("match byte") to "len" to minimize the

	387 // number of variables we need to store

	388 // between decoder calls.

	389 len = dict_get(&dict, rep0) << 1;

	390

	391 // The usage of "offset" allows omitting some

	392 // branches, which should give tiny speed

	393 // improvement on some CPUs. "offset" gets

	394 // set to zero if match_bit didn't match.

	395 offset = 0x100;

	396

	397 #ifdef HAVE_SMALL

	398 case SEQ_LITERAL_MATCHED:

	399 do {

	400 const uint32_t match_bit

	401 = len & offset;

	402 const uint32_t subcoder_index

	403 = offset + match_bit

	404 + symbol;

	405

	406 rc_bit(probs[subcoder_index],

	407 offset &= ~match_bit,

	408 offset &= match_bit,

	409 SEQ_LITERAL_MATCHED);

	410

	411 // It seems to be faster to do this

	412 // here instead of putting it to the

	413 // beginning of the loop and then

	414 // putting the "case" in the middle

	415 // of the loop.

	416 len <<= 1;

	417

	418 } while (symbol < (1 << 8));

	419 #else

	420 // Unroll the loop.

	421 uint32_t match_bit;

	422 uint32_t subcoder_index;

	423

	424 # define d(seq) \

	425 case seq: \

	426 match_bit = len & offset; \

	427 subcoder_index = offset + match_bit + symbol; \

	428 rc_bit(probs[subcoder_index], \

	429 offset &= ~match_bit, \

	430 offset &= match_bit, \

	431 seq)

	432

	433 d(SEQ_LITERAL_MATCHED0);

	434 len <<= 1;

	435 d(SEQ_LITERAL_MATCHED1);

	436 len <<= 1;

	437 d(SEQ_LITERAL_MATCHED2);

	438 len <<= 1;

	439 d(SEQ_LITERAL_MATCHED3);

	440 len <<= 1;

	441 d(SEQ_LITERAL_MATCHED4);

	442 len <<= 1;

	443 d(SEQ_LITERAL_MATCHED5);

	444 len <<= 1;

	445 d(SEQ_LITERAL_MATCHED6);

	446 len <<= 1;

	447 d(SEQ_LITERAL_MATCHED7);

	448 # undef d

	449 #endif

	450 }

	451

	452 //update_literal(state);

	453 // Use a lookup table to update to literal state,

	454 // since compared to other state updates, this would

	455 // need two branches.

	456 static const lzma_lzma_state next_state[] = {

	457 STATE_LIT_LIT,

	458 STATE_LIT_LIT,

	459 STATE_LIT_LIT,

	460 STATE_LIT_LIT,

	461 STATE_MATCH_LIT_LIT,

	462 STATE_REP_LIT_LIT,

	463 STATE_SHORTREP_LIT_LIT,

	464 STATE_MATCH_LIT,

	465 STATE_REP_LIT,

	466 STATE_SHORTREP_LIT,

	467 STATE_MATCH_LIT,

	468 STATE_REP_LIT

	469 };

	470 state = next_state[state];

	471

	472 case SEQ_LITERAL_WRITE:

	473 if (unlikely(dict_put(&dict, symbol))) {

	474 coder->sequence = SEQ_LITERAL_WRITE;

	475 goto out;

	476 }

	477

	478 continue;

	479 }

	480

	481 // Instead of a new byte we are going to get a byte range

	482 // (distance and length) which will be repeated from our

	483 // output history.

	484

	485 rc_update_1(coder->is_match[state][pos_state]);

	486

	487 case SEQ_IS_REP:

	488 rc_if_0(coder->is_rep[state], SEQ_IS_REP) {

	489 // Not a repeated match

	490 rc_update_0(coder->is_rep[state]);

	491 update_match(state);

	492

	493 // The latest three match distances are kept in

	494 // memory in case there are repeated matches.

	495 rep3 = rep2;

	496 rep2 = rep1;

	497 rep1 = rep0;

	498

	499 // Decode the length of the match.

	500 len_decode(len, coder->match_len_decoder,

	501 pos_state, SEQ_MATCH_LEN);

	502

	503 // Prepare to decode the highest two bits of the

	504 // match distance.

	505 probs = coder->pos_slot[get_len_to_pos_state(len)];

	506 symbol = 1;

	507

	508 #ifdef HAVE_SMALL

	509 case SEQ_POS_SLOT:

	510 do {

	511 rc_bit(probs[symbol], , , SEQ_POS_SLOT);

	512 } while (symbol < POS_SLOTS);

	513 #else

	514 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0);

	515 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1);

	516 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2);

	517 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3);

	518 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4);

	519 rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5);

	520 #endif

	521 // Get rid of the highest bit that was needed for

	522 // indexing of the probability array.

	523 symbol -= POS_SLOTS;

	524 assert(symbol <= 63);

	525

	526 if (symbol < START_POS_MODEL_INDEX) {

	527 // Match distances [0, 3] have only two bits.

	528 rep0 = symbol;

	529 } else {

	530 // Decode the lowest [1, 29] bits of

	531 // the match distance.

	532 limit = (symbol >> 1) - 1;

	533 assert(limit >= 1 && limit <= 30);

	534 rep0 = 2 + (symbol & 1);

	535

	536 if (symbol < END_POS_MODEL_INDEX) {

	537 // Prepare to decode the low bits for

	538 // a distance of [4, 127].

	539 assert(limit <= 5);

	540 rep0 <<= limit;

	541 assert(rep0 <= 96);

	542 // -1 is fine, because we start

	543 // decoding at probs[1], not probs[0].

	544 // NOTE: This violates the C standard,

	545 // since we are doing pointer

	546 // arithmetic past the beginning of

	547 // the array.

	548 assert((int32_t)(rep0 - symbol - 1)

	549 >= -1);

	550 assert((int32_t)(rep0 - symbol - 1)

	551 <= 82);

	552 probs = coder->pos_special + rep0

	553 - symbol - 1;

	554 symbol = 1;

	555 offset = 0;

	556 case SEQ_POS_MODEL:

	557 #ifdef HAVE_SMALL

	558 do {

	559 rc_bit(probs[symbol], ,

	560 rep0 += 1 << offset,

	561 SEQ_POS_MODEL);

	562 } while (++offset < limit);

	563 #else

	564 switch (limit) {

	565 case 5:

	566 assert(offset == 0);

	567 rc_bit(probs[symbol], ,

	568 rep0 += 1,

	569 SEQ_POS_MODEL);

	570 ++offset;

	571 --limit;

	572 case 4:

	573 rc_bit(probs[symbol], ,

	574 rep0 += 1 << offset,

	575 SEQ_POS_MODEL);

	576 ++offset;

	577 --limit;

	578 case 3:

	579 rc_bit(probs[symbol], ,

	580 rep0 += 1 << offset,

	581 SEQ_POS_MODEL);

	582 ++offset;

	583 --limit;

	584 case 2:

	585 rc_bit(probs[symbol], ,

	586 rep0 += 1 << offset,

	587 SEQ_POS_MODEL);

	588 ++offset;

	589 --limit;

	590 case 1:

	591 // We need "symbol" only for

	592 // indexing the probability

	593 // array, thus we can use

	594 // rc_bit_last() here to omit

	595 // the unneeded updating of

	596 // "symbol".

	597 rc_bit_last(probs[symbol], ,

	598 rep0 += 1 << offset,

	599 SEQ_POS_MODEL);

	600 }

	601 #endif

	602 } else {

	603 // The distance is >= 128. Decode the

	604 // lower bits without probabilities

	605 // except the lowest four bits.

	606 assert(symbol >= 14);

	607 assert(limit >= 6);

	608 limit -= ALIGN_BITS;

	609 assert(limit >= 2);

	610 case SEQ_DIRECT:

	611 // Not worth manual unrolling

	612 do {

	613 rc_direct(rep0, SEQ_DIRECT);

	614 } while (--limit > 0);

	615

	616 // Decode the lowest four bits using

	617 // probabilities.

	618 rep0 <<= ALIGN_BITS;

	619 symbol = 1;

	620 #ifdef HAVE_SMALL

	621 offset = 0;

	622 case SEQ_ALIGN:

	623 do {

	624 rc_bit(coder->pos_align[

	625 symbol], ,

	626 rep0 += 1 << offset,

	627 SEQ_ALIGN);

	628 } while (++offset < ALIGN_BITS);

	629 #else

	630 case SEQ_ALIGN0:

	631 rc_bit(coder->pos_align[symbol], ,

	632 rep0 += 1, SEQ_ALIGN0);

	633 case SEQ_ALIGN1:

	634 rc_bit(coder->pos_align[symbol], ,

	635 rep0 += 2, SEQ_ALIGN1);

	636 case SEQ_ALIGN2:

	637 rc_bit(coder->pos_align[symbol], ,

	638 rep0 += 4, SEQ_ALIGN2);

	639 case SEQ_ALIGN3:

	640 // Like in SEQ_POS_MODEL, we don't

	641 // need "symbol" for anything else

	642 // than indexing the probability array.

	643 rc_bit_last(coder->pos_align[symbol], ,

	644 rep0 += 8, SEQ_ALIGN3);

	645 #endif

	646

	647 if (rep0 == UINT32_MAX) {

	648 // End of payload marker was

	649 // found. It must not be

	650 // present if uncompressed

	651 // size is known.

	652 if (coder->uncompressed_size

	653 != LZMA_VLI_UNKNOWN) {

	654 ret = LZMA_DATA_ERROR;

	655 goto out;

	656 }

	657

	658 case SEQ_EOPM:

	659 // TODO Comment

	660 rc_normalize(SEQ_EOPM);

	661 ret = LZMA_STREAM_END;

	662 goto out;

	663 }

	664 }

	665 }

	666

	667 // Validate the distance we just decoded.

	668 if (unlikely(!dict_is_distance_valid(&dict, rep0))) {

	669 ret = LZMA_DATA_ERROR;

	670 goto out;

	671 }

	672

	673 } else {

	674 rc_update_1(coder->is_rep[state]);

	675

	676 // Repeated match

	677 //

	678 // The match distance is a value that we have had

	679 // earlier. The latest four match distances are

	680 // available as rep0, rep1, rep2 and rep3. We will

	681 // now decode which of them is the new distance.

	682 //

	683 // There cannot be a match if we haven't produced

	684 // any output, so check that first.

	685 if (unlikely(!dict_is_distance_valid(&dict, 0))) {

	686 ret = LZMA_DATA_ERROR;

	687 goto out;

	688 }

	689

	690 case SEQ_IS_REP0:

	691 rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {

	692 rc_update_0(coder->is_rep0[state]);

	693 // The distance is rep0.

	694

	695 case SEQ_IS_REP0_LONG:

	696 rc_if_0(coder->is_rep0_long[state][pos_state],

	697 SEQ_IS_REP0_LONG) {

	698 rc_update_0(coder->is_rep0_long[

	699 state][pos_state]);

	700

	701 update_short_rep(state);

	702

	703 case SEQ_SHORTREP:

	704 if (unlikely(dict_put(&dict, dict_get(

	705 &dict, rep0)))) {

	706 coder->sequence = SEQ_SHORTREP;

	707 goto out;

	708 }

	709

	710 continue;

	711 }

	712

	713 // Repeating more than one byte at

	714 // distance of rep0.

	715 rc_update_1(coder->is_rep0_long[

	716 state][pos_state]);

	717

	718 } else {

	719 rc_update_1(coder->is_rep0[state]);

	720

	721 case SEQ_IS_REP1:

	722 // The distance is rep1, rep2 or rep3. Once

	723 // we find out which one of these three, it

	724 // is stored to rep0 and rep1, rep2 and rep3

	725 // are updated accordingly.

	726 rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {

	727 rc_update_0(coder->is_rep1[state]);

	728

	729 const uint32_t distance = rep1;

	730 rep1 = rep0;

	731 rep0 = distance;

	732

	733 } else {

	734 rc_update_1(coder->is_rep1[state]);

	735 case SEQ_IS_REP2:

	736 rc_if_0(coder->is_rep2[state],

	737 SEQ_IS_REP2) {

	738 rc_update_0(coder->is_rep2[

	739 state]);

	740

	741 const uint32_t distance = rep2;

	742 rep2 = rep1;

	743 rep1 = rep0;

	744 rep0 = distance;

	745

	746 } else {

	747 rc_update_1(coder->is_rep2[

	748 state]);

	749

	750 const uint32_t distance = rep3;

	751 rep3 = rep2;

	752 rep2 = rep1;

	753 rep1 = rep0;

	754 rep0 = distance;

	755 }

	756 }

	757 }

	758

	759 update_long_rep(state);

	760

	761 // Decode the length of the repeated match.

	762 len_decode(len, coder->rep_len_decoder,

	763 pos_state, SEQ_REP_LEN);

	764 }

	765

	766 /////////////////////////////////

	767 // Repeat from history buffer. //

	768 /////////////////////////////////

	769

	770 // The length is always between these limits. There is no way

	771 // to trigger the algorithm to set len outside this range.

	772 assert(len >= MATCH_LEN_MIN);

	773 assert(len <= MATCH_LEN_MAX);

	774

	775 case SEQ_COPY:

	776 // Repeat len bytes from distance of rep0.

	777 if (unlikely(dict_repeat(&dict, rep0, &len))) {

	778 coder->sequence = SEQ_COPY;

	779 goto out;

	780 }

	781 }

	782

	783 rc_normalize(SEQ_NORMALIZE);

	784 coder->sequence = SEQ_IS_MATCH;

	785

	786 out:

	787 // Save state

	788

	789 // NOTE: Must not copy dict.limit.

	790 dictptr->pos = dict.pos;

	791 dictptr->full = dict.full;

	792

	793 rc_from_local(coder->rc, *in_pos);

	794

	795 coder->state = state;

	796 coder->rep0 = rep0;

	797 coder->rep1 = rep1;

	798 coder->rep2 = rep2;

	799 coder->rep3 = rep3;

	800

	801 coder->probs = probs;

	802 coder->symbol = symbol;

	803 coder->limit = limit;

	804 coder->offset = offset;

	805 coder->len = len;

	806

	807 // Update the remaining amount of uncompressed data if uncompressed

	808 // size was known.

	809 if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {

	810 coder->uncompressed_size -= dict.pos - dict_start;

	811

	812 // Since there cannot be end of payload marker if the

	813 // uncompressed size was known, we check here if we

	814 // finished decoding.

	815 if (coder->uncompressed_size == 0 && ret == LZMA_OK

	816 && coder->sequence != SEQ_NORMALIZE)

	817 ret = coder->sequence == SEQ_IS_MATCH

	818 ? LZMA_STREAM_END : LZMA_DATA_ERROR;

	819 }

	820

	821 // We can do an additional check in the range decoder to catch some

	822 // corrupted files.

	823 if (ret == LZMA_STREAM_END) {

	824 if (!rc_is_finished(coder->rc))

	825 ret = LZMA_DATA_ERROR;

	826

	827 // Reset the range decoder so that it is ready to reinitialize

	828 // for a new LZMA2 chunk.

	829 rc_reset(coder->rc);

	830 }

	831

	832 return ret;

	833 }

	834

	835

	836

	837 static void

	838 lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size)

	839 {

	840 coder->uncompressed_size = uncompressed_size;

	841 }

	842

	843 /*

	844 extern void

	845 lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)

	846 {

	847 // This is hack.

	848 ((lzma_coder *)(coder))->uncompressed_size = uncompressed_size;

	849 }

	850 */

	851

	852 static void

	853 lzma_decoder_reset(lzma_coder coder, const void opt)

	854 {

	855 const lzma_options_lzma *options = opt;

	856

	857 // NOTE: We assume that lc/lp/pb are valid since they were

	858 // successfully decoded with lzma_lzma_decode_properties().

	859 // FIXME?

	860

	861 // Calculate pos_mask. We don't need pos_bits as is for anything.

	862 coder->pos_mask = (1U << options->pb) - 1;

	863

	864 // Initialize the literal decoder.

	865 literal_init(coder->literal, options->lc, options->lp);

	866

	867 coder->literal_context_bits = options->lc;

	868 coder->literal_pos_mask = (1U << options->lp) - 1;

	869

	870 // State

	871 coder->state = STATE_LIT_LIT;

	872 coder->rep0 = 0;

	873 coder->rep1 = 0;

	874 coder->rep2 = 0;

	875 coder->rep3 = 0;

	876 coder->pos_mask = (1U << options->pb) - 1;

	877

	878 // Range decoder

	879 rc_reset(coder->rc);

	880

	881 // Bit and bittree decoders

	882 for (uint32_t i = 0; i < STATES; ++i) {

	883 for (uint32_t j = 0; j <= coder->pos_mask; ++j) {

	884 bit_reset(coder->is_match[i][j]);

	885 bit_reset(coder->is_rep0_long[i][j]);

	886 }

	887

	888 bit_reset(coder->is_rep[i]);

	889 bit_reset(coder->is_rep0[i]);

	890 bit_reset(coder->is_rep1[i]);

	891 bit_reset(coder->is_rep2[i]);

	892 }

	893

	894 for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i)

	895 bittree_reset(coder->pos_slot[i], POS_SLOT_BITS);

	896

	897 for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)

	898 bit_reset(coder->pos_special[i]);

	899

	900 bittree_reset(coder->pos_align, ALIGN_BITS);

	901

	902 // Len decoders (also bit/bittree)

	903 const uint32_t num_pos_states = 1U << options->pb;

	904 bit_reset(coder->match_len_decoder.choice);

	905 bit_reset(coder->match_len_decoder.choice2);

	906 bit_reset(coder->rep_len_decoder.choice);

	907 bit_reset(coder->rep_len_decoder.choice2);

	908

	909 for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {

	910 bittree_reset(coder->match_len_decoder.low[pos_state],

	911 LEN_LOW_BITS);

	912 bittree_reset(coder->match_len_decoder.mid[pos_state],

	913 LEN_MID_BITS);

	914

	915 bittree_reset(coder->rep_len_decoder.low[pos_state],

	916 LEN_LOW_BITS);

	917 bittree_reset(coder->rep_len_decoder.mid[pos_state],

	918 LEN_MID_BITS);

	919 }

	920

	921 bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);

	922 bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);

	923

	924 coder->sequence = SEQ_IS_MATCH;

	925 coder->probs = NULL;

	926 coder->symbol = 0;

	927 coder->limit = 0;

	928 coder->offset = 0;

	929 coder->len = 0;

	930

	931 return;

	932 }

	933

	934

	935 extern lzma_ret

	936 lzma_lzma_decoder_create(lzma_lz_decoder lz, lzma_allocator allocator,

	937 const void opt, lzma_lz_options lz_options)

	938 {

	939 if (lz->coder == NULL) {

	940 lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);

	941 if (lz->coder == NULL)

	942 return LZMA_MEM_ERROR;

	943

	944 lz->code = &lzma_decode;

	945 lz->reset = &lzma_decoder_reset;

	946 lz->set_uncompressed = &lzma_decoder_uncompressed;

	947 }

	948

	949 // All dictionary sizes are OK here. LZ decoder will take care of

	950 // the special cases.

	951 const lzma_options_lzma *options = opt;

	952 lz_options->dict_size = options->dict_size;

	953 lz_options->preset_dict = options->preset_dict;

	954 lz_options->preset_dict_size = options->preset_dict_size;

	955

	956 return LZMA_OK;

	957 }

	958

	959

	960 /// Allocate and initialize LZMA decoder. This is used only via LZ

	961 /// initialization (lzma_lzma_decoder_init() passes function pointer to

	962 /// the LZ initialization).

	963 static lzma_ret

	964 lzma_decoder_init(lzma_lz_decoder lz, lzma_allocator allocator,

	965 const void options, lzma_lz_options lz_options)

	966 {

	967 if (!is_lclppb_valid(options))

	968 return LZMA_PROG_ERROR;

	969

	970 return_if_error(lzma_lzma_decoder_create(

	971 lz, allocator, options, lz_options));

	972

	973 lzma_decoder_reset(lz->coder, options);

	974 lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);

	975

	976 return LZMA_OK;

	977 }

	978

	979

	980 extern lzma_ret

	981 lzma_lzma_decoder_init(lzma_next_coder next, lzma_allocator allocator,

	982 const lzma_filter_info *filters)

	983 {

	984 // LZMA can only be the last filter in the chain. This is enforced

	985 // by the raw_decoder initialization.

	986 assert(filters[1].init == NULL);

	987

	988 return lzma_lz_decoder_init(next, allocator, filters,

	989 &lzma_decoder_init);

	990 }

	991

	992

	993 extern bool

	994 lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)

	995 {

	996 if (byte > (4 * 5 + 4) * 9 + 8)

	997 return true;

	998

	999 // See the file format specification to understand this.

	1000 options->pb = byte / (9 * 5);

	1001 byte -= options->pb * 9 * 5;

	1002 options->lp = byte / 9;

	1003 options->lc = byte - options->lp * 9;

	1004

	1005 return options->lc + options->lp > LZMA_LCLP_MAX;

	1006 }

	1007

	1008

	1009 extern uint64_t

	1010 lzma_lzma_decoder_memusage_nocheck(const void *options)

	1011 {

	1012 const lzma_options_lzma *const opt = options;

	1013 return sizeof(lzma_coder) + lzma_lz_decoder_memusage(opt->dict_size);

	1014 }

	1015

	1016

	1017 extern uint64_t

	1018 lzma_lzma_decoder_memusage(const void *options)

	1019 {

	1020 if (!is_lclppb_valid(options))

	1021 return UINT64_MAX;

	1022

	1023 return lzma_lzma_decoder_memusage_nocheck(options);

	1024 }

	1025

	1026

	1027 extern lzma_ret

	1028 lzma_lzma_props_decode(void *options, lzma_allocator allocator,

	1029 const uint8_t *props, size_t props_size)

	1030 {

	1031 if (props_size != 5)

	1032 return LZMA_OPTIONS_ERROR;

	1033

	1034 lzma_options_lzma *opt

	1035 = lzma_alloc(sizeof(lzma_options_lzma), allocator);

	1036 if (opt == NULL)

	1037 return LZMA_MEM_ERROR;

	1038

	1039 if (lzma_lzma_lclppb_decode(opt, props[0]))

	1040 goto error;

	1041

	1042 // All dictionary sizes are accepted, including zero. LZ decoder

	1043 // will automatically use a dictionary at least a few KiB even if

	1044 // a smaller dictionary is requested.

	1045 opt->dict_size = unaligned_read32le(props + 1);

	1046

	1047 opt->preset_dict = NULL;

	1048 opt->preset_dict_size = 0;

	1049

	1050 *options = opt;

	1051

	1052 return LZMA_OK;

	1053

	1054 error:

	1055 lzma_free(opt, allocator);

	1056 return LZMA_OPTIONS_ERROR;

	1057 }

OLD	NEW

« no previous file with comments | « xz/src/liblzma/lzma/lzma_decoder.h ('k') | xz/src/liblzma/lzma/lzma_encoder.h » ('j') | no next file with comments »