OLD | NEW |
(Empty) | |
| 1 /////////////////////////////////////////////////////////////////////////////// |
| 2 // |
| 3 /// \file lzma2_encoder.c |
| 4 /// \brief LZMA2 encoder |
| 5 /// |
| 6 // Authors: Igor Pavlov |
| 7 // Lasse Collin |
| 8 // |
| 9 // This file has been put into the public domain. |
| 10 // You can do whatever you want with this file. |
| 11 // |
| 12 /////////////////////////////////////////////////////////////////////////////// |
| 13 |
| 14 #include "lz_encoder.h" |
| 15 #include "lzma_encoder.h" |
| 16 #include "fastpos.h" |
| 17 #include "lzma2_encoder.h" |
| 18 |
| 19 |
| 20 struct lzma_coder_s { |
| 21 enum { |
| 22 SEQ_INIT, |
| 23 SEQ_LZMA_ENCODE, |
| 24 SEQ_LZMA_COPY, |
| 25 SEQ_UNCOMPRESSED_HEADER, |
| 26 SEQ_UNCOMPRESSED_COPY, |
| 27 } sequence; |
| 28 |
| 29 /// LZMA encoder |
| 30 lzma_coder *lzma; |
| 31 |
| 32 /// LZMA options currently in use. |
| 33 lzma_options_lzma opt_cur; |
| 34 |
| 35 bool need_properties; |
| 36 bool need_state_reset; |
| 37 bool need_dictionary_reset; |
| 38 |
| 39 /// Uncompressed size of a chunk |
| 40 size_t uncompressed_size; |
| 41 |
| 42 /// Compressed size of a chunk (excluding headers); this is also used |
| 43 /// to indicate the end of buf[] in SEQ_LZMA_COPY. |
| 44 size_t compressed_size; |
| 45 |
| 46 /// Read position in buf[] |
| 47 size_t buf_pos; |
| 48 |
| 49 /// Buffer to hold the chunk header and LZMA compressed data |
| 50 uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX]; |
| 51 }; |
| 52 |
| 53 |
| 54 static void |
| 55 lzma2_header_lzma(lzma_coder *coder) |
| 56 { |
| 57 assert(coder->uncompressed_size > 0); |
| 58 assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); |
| 59 assert(coder->compressed_size > 0); |
| 60 assert(coder->compressed_size <= LZMA2_CHUNK_MAX); |
| 61 |
| 62 size_t pos; |
| 63 |
| 64 if (coder->need_properties) { |
| 65 pos = 0; |
| 66 |
| 67 if (coder->need_dictionary_reset) |
| 68 coder->buf[pos] = 0x80 + (3 << 5); |
| 69 else |
| 70 coder->buf[pos] = 0x80 + (2 << 5); |
| 71 } else { |
| 72 pos = 1; |
| 73 |
| 74 if (coder->need_state_reset) |
| 75 coder->buf[pos] = 0x80 + (1 << 5); |
| 76 else |
| 77 coder->buf[pos] = 0x80; |
| 78 } |
| 79 |
| 80 // Set the start position for copying. |
| 81 coder->buf_pos = pos; |
| 82 |
| 83 // Uncompressed size |
| 84 size_t size = coder->uncompressed_size - 1; |
| 85 coder->buf[pos++] += size >> 16; |
| 86 coder->buf[pos++] = (size >> 8) & 0xFF; |
| 87 coder->buf[pos++] = size & 0xFF; |
| 88 |
| 89 // Compressed size |
| 90 size = coder->compressed_size - 1; |
| 91 coder->buf[pos++] = size >> 8; |
| 92 coder->buf[pos++] = size & 0xFF; |
| 93 |
| 94 // Properties, if needed |
| 95 if (coder->need_properties) |
| 96 lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos); |
| 97 |
| 98 coder->need_properties = false; |
| 99 coder->need_state_reset = false; |
| 100 coder->need_dictionary_reset = false; |
| 101 |
| 102 // The copying code uses coder->compressed_size to indicate the end |
| 103 // of coder->buf[], so we need add the maximum size of the header here. |
| 104 coder->compressed_size += LZMA2_HEADER_MAX; |
| 105 |
| 106 return; |
| 107 } |
| 108 |
| 109 |
| 110 static void |
| 111 lzma2_header_uncompressed(lzma_coder *coder) |
| 112 { |
| 113 assert(coder->uncompressed_size > 0); |
| 114 assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX); |
| 115 |
| 116 // If this is the first chunk, we need to include dictionary |
| 117 // reset indicator. |
| 118 if (coder->need_dictionary_reset) |
| 119 coder->buf[0] = 1; |
| 120 else |
| 121 coder->buf[0] = 2; |
| 122 |
| 123 coder->need_dictionary_reset = false; |
| 124 |
| 125 // "Compressed" size |
| 126 coder->buf[1] = (coder->uncompressed_size - 1) >> 8; |
| 127 coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF; |
| 128 |
| 129 // Set the start position for copying. |
| 130 coder->buf_pos = 0; |
| 131 return; |
| 132 } |
| 133 |
| 134 |
| 135 static lzma_ret |
| 136 lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, |
| 137 uint8_t *restrict out, size_t *restrict out_pos, |
| 138 size_t out_size) |
| 139 { |
| 140 while (*out_pos < out_size) |
| 141 switch (coder->sequence) { |
| 142 case SEQ_INIT: |
| 143 // If there's no input left and we are flushing or finishing, |
| 144 // don't start a new chunk. |
| 145 if (mf_unencoded(mf) == 0) { |
| 146 // Write end of payload marker if finishing. |
| 147 if (mf->action == LZMA_FINISH) |
| 148 out[(*out_pos)++] = 0; |
| 149 |
| 150 return mf->action == LZMA_RUN |
| 151 ? LZMA_OK : LZMA_STREAM_END; |
| 152 } |
| 153 |
| 154 if (coder->need_state_reset) |
| 155 return_if_error(lzma_lzma_encoder_reset( |
| 156 coder->lzma, &coder->opt_cur)); |
| 157 |
| 158 coder->uncompressed_size = 0; |
| 159 coder->compressed_size = 0; |
| 160 coder->sequence = SEQ_LZMA_ENCODE; |
| 161 |
| 162 // Fall through |
| 163 |
| 164 case SEQ_LZMA_ENCODE: { |
| 165 // Calculate how much more uncompressed data this chunk |
| 166 // could accept. |
| 167 const uint32_t left = LZMA2_UNCOMPRESSED_MAX |
| 168 - coder->uncompressed_size; |
| 169 uint32_t limit; |
| 170 |
| 171 if (left < mf->match_len_max) { |
| 172 // Must flush immediately since the next LZMA symbol |
| 173 // could make the uncompressed size of the chunk too |
| 174 // big. |
| 175 limit = 0; |
| 176 } else { |
| 177 // Calculate maximum read_limit that is OK from point |
| 178 // of view of LZMA2 chunk size. |
| 179 limit = mf->read_pos - mf->read_ahead |
| 180 + left - mf->match_len_max; |
| 181 } |
| 182 |
| 183 // Save the start position so that we can update |
| 184 // coder->uncompressed_size. |
| 185 const uint32_t read_start = mf->read_pos - mf->read_ahead; |
| 186 |
| 187 // Call the LZMA encoder until the chunk is finished. |
| 188 const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf, |
| 189 coder->buf + LZMA2_HEADER_MAX, |
| 190 &coder->compressed_size, |
| 191 LZMA2_CHUNK_MAX, limit); |
| 192 |
| 193 coder->uncompressed_size += mf->read_pos - mf->read_ahead |
| 194 - read_start; |
| 195 |
| 196 assert(coder->compressed_size <= LZMA2_CHUNK_MAX); |
| 197 assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); |
| 198 |
| 199 if (ret != LZMA_STREAM_END) |
| 200 return LZMA_OK; |
| 201 |
| 202 // See if the chunk compressed. If it didn't, we encode it |
| 203 // as uncompressed chunk. This saves a few bytes of space |
| 204 // and makes decoding faster. |
| 205 if (coder->compressed_size >= coder->uncompressed_size) { |
| 206 coder->uncompressed_size += mf->read_ahead; |
| 207 assert(coder->uncompressed_size |
| 208 <= LZMA2_UNCOMPRESSED_MAX); |
| 209 mf->read_ahead = 0; |
| 210 lzma2_header_uncompressed(coder); |
| 211 coder->need_state_reset = true; |
| 212 coder->sequence = SEQ_UNCOMPRESSED_HEADER; |
| 213 break; |
| 214 } |
| 215 |
| 216 // The chunk did compress at least by one byte, so we store |
| 217 // the chunk as LZMA. |
| 218 lzma2_header_lzma(coder); |
| 219 |
| 220 coder->sequence = SEQ_LZMA_COPY; |
| 221 } |
| 222 |
| 223 // Fall through |
| 224 |
| 225 case SEQ_LZMA_COPY: |
| 226 // Copy the compressed chunk along its headers to the |
| 227 // output buffer. |
| 228 lzma_bufcpy(coder->buf, &coder->buf_pos, |
| 229 coder->compressed_size, |
| 230 out, out_pos, out_size); |
| 231 if (coder->buf_pos != coder->compressed_size) |
| 232 return LZMA_OK; |
| 233 |
| 234 coder->sequence = SEQ_INIT; |
| 235 break; |
| 236 |
| 237 case SEQ_UNCOMPRESSED_HEADER: |
| 238 // Copy the three-byte header to indicate uncompressed chunk. |
| 239 lzma_bufcpy(coder->buf, &coder->buf_pos, |
| 240 LZMA2_HEADER_UNCOMPRESSED, |
| 241 out, out_pos, out_size); |
| 242 if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED) |
| 243 return LZMA_OK; |
| 244 |
| 245 coder->sequence = SEQ_UNCOMPRESSED_COPY; |
| 246 |
| 247 // Fall through |
| 248 |
| 249 case SEQ_UNCOMPRESSED_COPY: |
| 250 // Copy the uncompressed data as is from the dictionary |
| 251 // to the output buffer. |
| 252 mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size); |
| 253 if (coder->uncompressed_size != 0) |
| 254 return LZMA_OK; |
| 255 |
| 256 coder->sequence = SEQ_INIT; |
| 257 break; |
| 258 } |
| 259 |
| 260 return LZMA_OK; |
| 261 } |
| 262 |
| 263 |
| 264 static void |
| 265 lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator) |
| 266 { |
| 267 lzma_free(coder->lzma, allocator); |
| 268 lzma_free(coder, allocator); |
| 269 return; |
| 270 } |
| 271 |
| 272 |
| 273 static lzma_ret |
| 274 lzma2_encoder_options_update(lzma_coder *coder, const lzma_filter *filter) |
| 275 { |
| 276 // New options can be set only when there is no incomplete chunk. |
| 277 // This is the case at the beginning of the raw stream and right |
| 278 // after LZMA_SYNC_FLUSH. |
| 279 if (filter->options == NULL || coder->sequence != SEQ_INIT) |
| 280 return LZMA_PROG_ERROR; |
| 281 |
| 282 // Look if there are new options. At least for now, |
| 283 // only lc/lp/pb can be changed. |
| 284 const lzma_options_lzma *opt = filter->options; |
| 285 if (coder->opt_cur.lc != opt->lc || coder->opt_cur.lp != opt->lp |
| 286 || coder->opt_cur.pb != opt->pb) { |
| 287 // Validate the options. |
| 288 if (opt->lc > LZMA_LCLP_MAX || opt->lp > LZMA_LCLP_MAX |
| 289 || opt->lc + opt->lp > LZMA_LCLP_MAX |
| 290 || opt->pb > LZMA_PB_MAX) |
| 291 return LZMA_OPTIONS_ERROR; |
| 292 |
| 293 // The new options will be used when the encoder starts |
| 294 // a new LZMA2 chunk. |
| 295 coder->opt_cur.lc = opt->lc; |
| 296 coder->opt_cur.lp = opt->lp; |
| 297 coder->opt_cur.pb = opt->pb; |
| 298 coder->need_properties = true; |
| 299 coder->need_state_reset = true; |
| 300 } |
| 301 |
| 302 return LZMA_OK; |
| 303 } |
| 304 |
| 305 |
| 306 static lzma_ret |
| 307 lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, |
| 308 const void *options, lzma_lz_options *lz_options) |
| 309 { |
| 310 if (options == NULL) |
| 311 return LZMA_PROG_ERROR; |
| 312 |
| 313 if (lz->coder == NULL) { |
| 314 lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); |
| 315 if (lz->coder == NULL) |
| 316 return LZMA_MEM_ERROR; |
| 317 |
| 318 lz->code = &lzma2_encode; |
| 319 lz->end = &lzma2_encoder_end; |
| 320 lz->options_update = &lzma2_encoder_options_update; |
| 321 |
| 322 lz->coder->lzma = NULL; |
| 323 } |
| 324 |
| 325 lz->coder->opt_cur = *(const lzma_options_lzma *)(options); |
| 326 |
| 327 lz->coder->sequence = SEQ_INIT; |
| 328 lz->coder->need_properties = true; |
| 329 lz->coder->need_state_reset = false; |
| 330 lz->coder->need_dictionary_reset |
| 331 = lz->coder->opt_cur.preset_dict == NULL |
| 332 || lz->coder->opt_cur.preset_dict_size == 0; |
| 333 |
| 334 // Initialize LZMA encoder |
| 335 return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator, |
| 336 &lz->coder->opt_cur, lz_options)); |
| 337 |
| 338 // Make sure that we will always have enough history available in |
| 339 // case we need to use uncompressed chunks. They are used when the |
| 340 // compressed size of a chunk is not smaller than the uncompressed |
| 341 // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes |
| 342 // history available. |
| 343 if (lz_options->before_size + lz_options->dict_size < LZMA2_CHUNK_MAX) |
| 344 lz_options->before_size |
| 345 = LZMA2_CHUNK_MAX - lz_options->dict_size; |
| 346 |
| 347 return LZMA_OK; |
| 348 } |
| 349 |
| 350 |
| 351 extern lzma_ret |
| 352 lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, |
| 353 const lzma_filter_info *filters) |
| 354 { |
| 355 return lzma_lz_encoder_init( |
| 356 next, allocator, filters, &lzma2_encoder_init); |
| 357 } |
| 358 |
| 359 |
| 360 extern uint64_t |
| 361 lzma_lzma2_encoder_memusage(const void *options) |
| 362 { |
| 363 const uint64_t lzma_mem = lzma_lzma_encoder_memusage(options); |
| 364 if (lzma_mem == UINT64_MAX) |
| 365 return UINT64_MAX; |
| 366 |
| 367 return sizeof(lzma_coder) + lzma_mem; |
| 368 } |
| 369 |
| 370 |
| 371 extern lzma_ret |
| 372 lzma_lzma2_props_encode(const void *options, uint8_t *out) |
| 373 { |
| 374 const lzma_options_lzma *const opt = options; |
| 375 uint32_t d = my_max(opt->dict_size, LZMA_DICT_SIZE_MIN); |
| 376 |
| 377 // Round up to to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending |
| 378 // on which one is the next: |
| 379 --d; |
| 380 d |= d >> 2; |
| 381 d |= d >> 3; |
| 382 d |= d >> 4; |
| 383 d |= d >> 8; |
| 384 d |= d >> 16; |
| 385 |
| 386 // Get the highest two bits using the proper encoding: |
| 387 if (d == UINT32_MAX) |
| 388 out[0] = 40; |
| 389 else |
| 390 out[0] = get_pos_slot(d + 1) - 24; |
| 391 |
| 392 return LZMA_OK; |
| 393 } |
OLD | NEW |