xz/src/liblzma/lz/lz_encoder.h - Issue 2869016: Add an unpatched version of xz, XZ Utils, to /trunk/deps/third_party

Side by Side Diff: xz/src/liblzma/lz/lz_encoder.h

Issue 2869016: Add an unpatched version of xz, XZ Utils, to /trunk/deps/third_party (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/

Patch Set: Created 10 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 ///////////////////////////////////////////////////////////////////////////////

	2 //

	3 /// \file lz_encoder.h

	4 /// \brief LZ in window and match finder API

	5 ///

	6 // Authors: Igor Pavlov

	7 // Lasse Collin

	8 //

	9 // This file has been put into the public domain.

	10 // You can do whatever you want with this file.

	11 //

	12 ///////////////////////////////////////////////////////////////////////////////

	13

	14 #ifndef LZMA_LZ_ENCODER_H

	15 #define LZMA_LZ_ENCODER_H

	16

	17 #include "common.h"

	18

	19

	20 /// A table of these is used by the LZ-based encoder to hold

	21 /// the length-distance pairs found by the match finder.

	22 typedef struct {

	23 uint32_t len;

	24 uint32_t dist;

	25 } lzma_match;

	26

	27

	28 typedef struct lzma_mf_s lzma_mf;

	29 struct lzma_mf_s {

	30 ///////////////

	31 // In Window //

	32 ///////////////

	33

	34 /// Pointer to buffer with data to be compressed

	35 uint8_t *buffer;

	36

	37 /// Total size of the allocated buffer (that is, including all

	38 /// the extra space)

	39 uint32_t size;

	40

	41 /// Number of bytes that must be kept available in our input history.

	42 /// That is, once keep_size_before bytes have been processed,

	43 /// buffer[read_pos - keep_size_before] is the oldest byte that

	44 /// must be available for reading.

	45 uint32_t keep_size_before;

	46

	47 /// Number of bytes that must be kept in buffer after read_pos.

	48 /// That is, read_pos <= write_pos - keep_size_after as long as

	49 /// action is LZMA_RUN; when action != LZMA_RUN, read_pos is allowed

	50 /// to reach write_pos so that the last bytes get encoded too.

	51 uint32_t keep_size_after;

	52

	53 /// Match finders store locations of matches using 32-bit integers.

	54 /// To avoid adjusting several megabytes of integers every time the

	55 /// input window is moved with move_window, we only adjust the

	56 /// offset of the buffer. Thus, buffer[value_in_hash_table - offset]

	57 /// is the byte pointed by value_in_hash_table.

	58 uint32_t offset;

	59

	60 /// buffer[read_pos] is the next byte to run through the match

	61 /// finder. This is incremented in the match finder once the byte

	62 /// has been processed.

	63 uint32_t read_pos;

	64

	65 /// Number of bytes that have been ran through the match finder, but

	66 /// which haven't been encoded by the LZ-based encoder yet.

	67 uint32_t read_ahead;

	68

	69 /// As long as read_pos is less than read_limit, there is enough

	70 /// input available in buffer for at least one encoding loop.

	71 ///

	72 /// Because of the stateful API, read_limit may and will get greater

	73 /// than read_pos quite often. This is taken into account when

	74 /// calculating the value for keep_size_after.

	75 uint32_t read_limit;

	76

	77 /// buffer[write_pos] is the first byte that doesn't contain valid

	78 /// uncompressed data; that is, the next input byte will be copied

	79 /// to buffer[write_pos].

	80 uint32_t write_pos;

	81

	82 /// Number of bytes not hashed before read_pos. This is needed to

	83 /// restart the match finder after LZMA_SYNC_FLUSH.

	84 uint32_t pending;

	85

	86 //////////////////

	87 // Match Finder //

	88 //////////////////

	89

	90 /// Find matches. Returns the number of distance-length pairs written

	91 /// to the matches array. This is called only via lzma_mf_find().

	92 uint32_t (find)(lzma_mf mf, lzma_match *matches);

	93

	94 /// Skips num bytes. This is like find() but doesn't make the

	95 /// distance-length pairs available, thus being a little faster.

	96 /// This is called only via mf_skip().

	97 void (skip)(lzma_mf mf, uint32_t num);

	98

	99 uint32_t *hash;

	100 uint32_t *son;

	101 uint32_t cyclic_pos;

	102 uint32_t cyclic_size; // Must be dictionary size + 1.

	103 uint32_t hash_mask;

	104

	105 /// Maximum number of loops in the match finder

	106 uint32_t depth;

	107

	108 /// Maximum length of a match that the match finder will try to find.

	109 uint32_t nice_len;

	110

	111 /// Maximum length of a match supported by the LZ-based encoder.

	112 /// If the longest match found by the match finder is nice_len,

	113 /// mf_find() tries to expand it up to match_len_max bytes.

	114 uint32_t match_len_max;

	115

	116 /// When running out of input, binary tree match finders need to know

	117 /// if it is due to flushing or finishing. The action is used also

	118 /// by the LZ-based encoders themselves.

	119 lzma_action action;

	120

	121 /// Number of elements in hash[]

	122 uint32_t hash_size_sum;

	123

	124 /// Number of elements in son[]

	125 uint32_t sons_count;

	126 };

	127

	128

	129 typedef struct {

	130 /// Extra amount of data to keep available before the "actual"

	131 /// dictionary.

	132 size_t before_size;

	133

	134 /// Size of the history buffer

	135 size_t dict_size;

	136

	137 /// Extra amount of data to keep available after the "actual"

	138 /// dictionary.

	139 size_t after_size;

	140

	141 /// Maximum length of a match that the LZ-based encoder can accept.

	142 /// This is used to extend matches of length nice_len to the

	143 /// maximum possible length.

	144 size_t match_len_max;

	145

	146 /// Match finder will search matches up to this length.

	147 /// This must be less than or equal to match_len_max.

	148 size_t nice_len;

	149

	150 /// Type of the match finder to use

	151 lzma_match_finder match_finder;

	152

	153 /// Maximum search depth

	154 uint32_t depth;

	155

	156 /// TODO: Comment

	157 const uint8_t *preset_dict;

	158

	159 uint32_t preset_dict_size;

	160

	161 } lzma_lz_options;

	162

	163

	164 // The total usable buffer space at any moment outside the match finder:

	165 // before_size + dict_size + after_size + match_len_max

	166 //

	167 // In reality, there's some extra space allocated to prevent the number of

	168 // memmove() calls reasonable. The bigger the dict_size is, the bigger

	169 // this extra buffer will be since with bigger dictionaries memmove() would

	170 // also take longer.

	171 //

	172 // A single encoder loop in the LZ-based encoder may call the match finder

	173 // (mf_find() or mf_skip()) at most after_size times. In other words,

	174 // a single encoder loop may increment lzma_mf.read_pos at most after_size

	175 // times. Since matches are looked up to

	176 // lzma_mf.buffer[lzma_mf.read_pos + match_len_max - 1], the total

	177 // amount of extra buffer needed after dict_size becomes

	178 // after_size + match_len_max.

	179 //

	180 // before_size has two uses. The first one is to keep literals available

	181 // in cases when the LZ-based encoder has made some read ahead.

	182 // TODO: Maybe this could be changed by making the LZ-based encoders to

	183 // store the actual literals as they do with length-distance pairs.

	184 //

	185 // Algorithms such as LZMA2 first try to compress a chunk, and then check

	186 // if the encoded result is smaller than the uncompressed one. If the chunk

	187 // was uncompressible, it is better to store it in uncompressed form in

	188 // the output stream. To do this, the whole uncompressed chunk has to be

	189 // still available in the history buffer. before_size achieves that.

	190

	191

	192 typedef struct {

	193 /// Data specific to the LZ-based encoder

	194 lzma_coder *coder;

	195

	196 /// Function to encode from *dict to out[]

	197 lzma_ret (code)(lzma_coder restrict coder,

	198 lzma_mf restrict mf, uint8_t restrict out,

	199 size_t *restrict out_pos, size_t out_size);

	200

	201 /// Free allocated resources

	202 void (end)(lzma_coder coder, lzma_allocator *allocator);

	203

	204 /// Update the options in the middle of the encoding.

	205 lzma_ret (options_update)(lzma_coder coder,

	206 const lzma_filter *filter);

	207

	208 } lzma_lz_encoder;

	209

	210

	211 // Basic steps:

	212 // 1. Input gets copied into the dictionary.

	213 // 2. Data in dictionary gets run through the match finder byte by byte.

	214 // 3. The literals and matches are encoded using e.g. LZMA.

	215 //

	216 // The bytes that have been ran through the match finder, but not encoded yet,

	217 // are called `read ahead'.

	218

	219

	220 /// Get pointer to the first byte not ran through the match finder

	221 static inline const uint8_t *

	222 mf_ptr(const lzma_mf *mf)

	223 {

	224 return mf->buffer + mf->read_pos;

	225 }

	226

	227

	228 /// Get the number of bytes that haven't been ran through the match finder yet.

	229 static inline uint32_t

	230 mf_avail(const lzma_mf *mf)

	231 {

	232 return mf->write_pos - mf->read_pos;

	233 }

	234

	235

	236 /// Get the number of bytes that haven't been encoded yet (some of these

	237 /// bytes may have been ran through the match finder though).

	238 static inline uint32_t

	239 mf_unencoded(const lzma_mf *mf)

	240 {

	241 return mf->write_pos - mf->read_pos + mf->read_ahead;

	242 }

	243

	244

	245 /// Calculate the absolute offset from the beginning of the most recent

	246 /// dictionary reset. Only the lowest four bits are important, so there's no

	247 /// problem that we don't know the 64-bit size of the data encoded so far.

	248 ///

	249 /// NOTE: When moving the input window, we need to do it so that the lowest

	250 /// bits of dict->read_pos are not modified to keep this macro working

	251 /// as intended.

	252 static inline uint32_t

	253 mf_position(const lzma_mf *mf)

	254 {

	255 return mf->read_pos - mf->read_ahead;

	256 }

	257

	258

	259 /// Since everything else begins with mf_, use it also for lzma_mf_find().

	260 #define mf_find lzma_mf_find

	261

	262

	263 /// Skip the given number of bytes. This is used when a good match was found.

	264 /// For example, if mf_find() finds a match of 200 bytes long, the first byte

	265 /// of that match was already consumed by mf_find(), and the rest 199 bytes

	266 /// have to be skipped with mf_skip(mf, 199).

	267 static inline void

	268 mf_skip(lzma_mf *mf, uint32_t amount)

	269 {

	270 if (amount != 0) {

	271 mf->skip(mf, amount);

	272 mf->read_ahead += amount;

	273 }

	274 }

	275

	276

	277 /// Copies at most *left number of bytes from the history buffer

	278 /// to out[]. This is needed by LZMA2 to encode uncompressed chunks.

	279 static inline void

	280 mf_read(lzma_mf mf, uint8_t out, size_t *out_pos, size_t out_size,

	281 size_t *left)

	282 {

	283 const size_t out_avail = out_size - *out_pos;

	284 const size_t copy_size = my_min(out_avail, *left);

	285

	286 assert(mf->read_ahead == 0);

	287 assert(mf->read_pos >= *left);

	288

	289 memcpy(out + out_pos, mf->buffer + mf->read_pos - left,

	290 copy_size);

	291

	292 *out_pos += copy_size;

	293 *left -= copy_size;

	294 return;

	295 }

	296

	297

	298 extern lzma_ret lzma_lz_encoder_init(

	299 lzma_next_coder next, lzma_allocator allocator,

	300 const lzma_filter_info *filters,

	301 lzma_ret (lz_init)(lzma_lz_encoder lz,

	302 lzma_allocator allocator, const void options,

	303 lzma_lz_options *lz_options));

	304

	305

	306 extern uint64_t lzma_lz_encoder_memusage(const lzma_lz_options *lz_options);

	307

	308

	309 // These are only for LZ encoder's internal use.

	310 extern uint32_t lzma_mf_find(

	311 lzma_mf mf, uint32_t count, lzma_match *matches);

	312

	313 extern uint32_t lzma_mf_hc3_find(lzma_mf dict, lzma_match matches);

	314 extern void lzma_mf_hc3_skip(lzma_mf *dict, uint32_t amount);

	315

	316 extern uint32_t lzma_mf_hc4_find(lzma_mf dict, lzma_match matches);

	317 extern void lzma_mf_hc4_skip(lzma_mf *dict, uint32_t amount);

	318

	319 extern uint32_t lzma_mf_bt2_find(lzma_mf dict, lzma_match matches);

	320 extern void lzma_mf_bt2_skip(lzma_mf *dict, uint32_t amount);

	321

	322 extern uint32_t lzma_mf_bt3_find(lzma_mf dict, lzma_match matches);

	323 extern void lzma_mf_bt3_skip(lzma_mf *dict, uint32_t amount);

	324

	325 extern uint32_t lzma_mf_bt4_find(lzma_mf dict, lzma_match matches);

	326 extern void lzma_mf_bt4_skip(lzma_mf *dict, uint32_t amount);

	327

	328 #endif

OLD	NEW

« no previous file with comments | « xz/src/liblzma/lz/lz_decoder.c ('k') | xz/src/liblzma/lz/lz_encoder.c » ('j') | no next file with comments »