OLD | NEW |
(Empty) | |
| 1 /////////////////////////////////////////////////////////////////////////////// |
| 2 // |
| 3 /// \file index_decoder.c |
| 4 /// \brief Decodes the Index field |
| 5 // |
| 6 // Author: Lasse Collin |
| 7 // |
| 8 // This file has been put into the public domain. |
| 9 // You can do whatever you want with this file. |
| 10 // |
| 11 /////////////////////////////////////////////////////////////////////////////// |
| 12 |
| 13 #include "index.h" |
| 14 #include "check.h" |
| 15 |
| 16 |
| 17 struct lzma_coder_s { |
| 18 enum { |
| 19 SEQ_INDICATOR, |
| 20 SEQ_COUNT, |
| 21 SEQ_MEMUSAGE, |
| 22 SEQ_UNPADDED, |
| 23 SEQ_UNCOMPRESSED, |
| 24 SEQ_PADDING_INIT, |
| 25 SEQ_PADDING, |
| 26 SEQ_CRC32, |
| 27 } sequence; |
| 28 |
| 29 /// Memory usage limit |
| 30 uint64_t memlimit; |
| 31 |
| 32 /// Target Index |
| 33 lzma_index *index; |
| 34 |
| 35 /// Pointer give by the application, which is set after |
| 36 /// successful decoding. |
| 37 lzma_index **index_ptr; |
| 38 |
| 39 /// Number of Records left to decode. |
| 40 lzma_vli count; |
| 41 |
| 42 /// The most recent Unpadded Size field |
| 43 lzma_vli unpadded_size; |
| 44 |
| 45 /// The most recent Uncompressed Size field |
| 46 lzma_vli uncompressed_size; |
| 47 |
| 48 /// Position in integers |
| 49 size_t pos; |
| 50 |
| 51 /// CRC32 of the List of Records field |
| 52 uint32_t crc32; |
| 53 }; |
| 54 |
| 55 |
| 56 static lzma_ret |
| 57 index_decode(lzma_coder *coder, lzma_allocator *allocator, |
| 58 const uint8_t *restrict in, size_t *restrict in_pos, |
| 59 size_t in_size, uint8_t *restrict out lzma_attribute((unused)), |
| 60 size_t *restrict out_pos lzma_attribute((unused)), |
| 61 size_t out_size lzma_attribute((unused)), |
| 62 lzma_action action lzma_attribute((unused))) |
| 63 { |
| 64 // Similar optimization as in index_encoder.c |
| 65 const size_t in_start = *in_pos; |
| 66 lzma_ret ret = LZMA_OK; |
| 67 |
| 68 while (*in_pos < in_size) |
| 69 switch (coder->sequence) { |
| 70 case SEQ_INDICATOR: |
| 71 // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or |
| 72 // LZMA_FORMAT_ERROR, because a typical usage case for Index |
| 73 // decoder is when parsing the Stream backwards. If seeking |
| 74 // backward from the Stream Footer gives us something that |
| 75 // doesn't begin with Index Indicator, the file is considered |
| 76 // corrupt, not "programming error" or "unrecognized file |
| 77 // format". One could argue that the application should |
| 78 // verify the Index Indicator before trying to decode the |
| 79 // Index, but well, I suppose it is simpler this way. |
| 80 if (in[(*in_pos)++] != 0x00) |
| 81 return LZMA_DATA_ERROR; |
| 82 |
| 83 coder->sequence = SEQ_COUNT; |
| 84 break; |
| 85 |
| 86 case SEQ_COUNT: |
| 87 ret = lzma_vli_decode(&coder->count, &coder->pos, |
| 88 in, in_pos, in_size); |
| 89 if (ret != LZMA_STREAM_END) |
| 90 goto out; |
| 91 |
| 92 coder->pos = 0; |
| 93 coder->sequence = SEQ_MEMUSAGE; |
| 94 |
| 95 // Fall through |
| 96 |
| 97 case SEQ_MEMUSAGE: |
| 98 if (lzma_index_memusage(1, coder->count) > coder->memlimit) { |
| 99 ret = LZMA_MEMLIMIT_ERROR; |
| 100 goto out; |
| 101 } |
| 102 |
| 103 // Tell the Index handling code how many Records this |
| 104 // Index has to allow it to allocate memory more efficiently. |
| 105 lzma_index_prealloc(coder->index, coder->count); |
| 106 |
| 107 ret = LZMA_OK; |
| 108 coder->sequence = coder->count == 0 |
| 109 ? SEQ_PADDING_INIT : SEQ_UNPADDED; |
| 110 break; |
| 111 |
| 112 case SEQ_UNPADDED: |
| 113 case SEQ_UNCOMPRESSED: { |
| 114 lzma_vli *size = coder->sequence == SEQ_UNPADDED |
| 115 ? &coder->unpadded_size |
| 116 : &coder->uncompressed_size; |
| 117 |
| 118 ret = lzma_vli_decode(size, &coder->pos, |
| 119 in, in_pos, in_size); |
| 120 if (ret != LZMA_STREAM_END) |
| 121 goto out; |
| 122 |
| 123 ret = LZMA_OK; |
| 124 coder->pos = 0; |
| 125 |
| 126 if (coder->sequence == SEQ_UNPADDED) { |
| 127 // Validate that encoded Unpadded Size isn't too small |
| 128 // or too big. |
| 129 if (coder->unpadded_size < UNPADDED_SIZE_MIN |
| 130 || coder->unpadded_size |
| 131 > UNPADDED_SIZE_MAX) |
| 132 return LZMA_DATA_ERROR; |
| 133 |
| 134 coder->sequence = SEQ_UNCOMPRESSED; |
| 135 } else { |
| 136 // Add the decoded Record to the Index. |
| 137 return_if_error(lzma_index_append( |
| 138 coder->index, allocator, |
| 139 coder->unpadded_size, |
| 140 coder->uncompressed_size)); |
| 141 |
| 142 // Check if this was the last Record. |
| 143 coder->sequence = --coder->count == 0 |
| 144 ? SEQ_PADDING_INIT |
| 145 : SEQ_UNPADDED; |
| 146 } |
| 147 |
| 148 break; |
| 149 } |
| 150 |
| 151 case SEQ_PADDING_INIT: |
| 152 coder->pos = lzma_index_padding_size(coder->index); |
| 153 coder->sequence = SEQ_PADDING; |
| 154 |
| 155 // Fall through |
| 156 |
| 157 case SEQ_PADDING: |
| 158 if (coder->pos > 0) { |
| 159 --coder->pos; |
| 160 if (in[(*in_pos)++] != 0x00) |
| 161 return LZMA_DATA_ERROR; |
| 162 |
| 163 break; |
| 164 } |
| 165 |
| 166 // Finish the CRC32 calculation. |
| 167 coder->crc32 = lzma_crc32(in + in_start, |
| 168 *in_pos - in_start, coder->crc32); |
| 169 |
| 170 coder->sequence = SEQ_CRC32; |
| 171 |
| 172 // Fall through |
| 173 |
| 174 case SEQ_CRC32: |
| 175 do { |
| 176 if (*in_pos == in_size) |
| 177 return LZMA_OK; |
| 178 |
| 179 if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) |
| 180 != in[(*in_pos)++]) |
| 181 return LZMA_DATA_ERROR; |
| 182 |
| 183 } while (++coder->pos < 4); |
| 184 |
| 185 // Decoding was successful, now we can let the application |
| 186 // see the decoded Index. |
| 187 *coder->index_ptr = coder->index; |
| 188 |
| 189 // Make index NULL so we don't free it unintentionally. |
| 190 coder->index = NULL; |
| 191 |
| 192 return LZMA_STREAM_END; |
| 193 |
| 194 default: |
| 195 assert(0); |
| 196 return LZMA_PROG_ERROR; |
| 197 } |
| 198 |
| 199 out: |
| 200 // Update the CRC32, |
| 201 coder->crc32 = lzma_crc32(in + in_start, |
| 202 *in_pos - in_start, coder->crc32); |
| 203 |
| 204 return ret; |
| 205 } |
| 206 |
| 207 |
| 208 static void |
| 209 index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) |
| 210 { |
| 211 lzma_index_end(coder->index, allocator); |
| 212 lzma_free(coder, allocator); |
| 213 return; |
| 214 } |
| 215 |
| 216 |
| 217 static lzma_ret |
| 218 index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, |
| 219 uint64_t *old_memlimit, uint64_t new_memlimit) |
| 220 { |
| 221 *memusage = lzma_index_memusage(1, coder->count); |
| 222 *old_memlimit = coder->memlimit; |
| 223 |
| 224 if (new_memlimit != 0) { |
| 225 if (new_memlimit < *memusage) |
| 226 return LZMA_MEMLIMIT_ERROR; |
| 227 |
| 228 coder->memlimit = new_memlimit; |
| 229 } |
| 230 |
| 231 return LZMA_OK; |
| 232 } |
| 233 |
| 234 |
| 235 static lzma_ret |
| 236 index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator, |
| 237 lzma_index **i, uint64_t memlimit) |
| 238 { |
| 239 // Remember the pointer given by the application. We will set it |
| 240 // to point to the decoded Index only if decoding is successful. |
| 241 // Before that, keep it NULL so that applications can always safely |
| 242 // pass it to lzma_index_end() no matter did decoding succeed or not. |
| 243 coder->index_ptr = i; |
| 244 *i = NULL; |
| 245 |
| 246 // We always allocate a new lzma_index. |
| 247 coder->index = lzma_index_init(allocator); |
| 248 if (coder->index == NULL) |
| 249 return LZMA_MEM_ERROR; |
| 250 |
| 251 // Initialize the rest. |
| 252 coder->sequence = SEQ_INDICATOR; |
| 253 coder->memlimit = memlimit; |
| 254 coder->count = 0; // Needs to be initialized due to _memconfig(). |
| 255 coder->pos = 0; |
| 256 coder->crc32 = 0; |
| 257 |
| 258 return LZMA_OK; |
| 259 } |
| 260 |
| 261 |
| 262 static lzma_ret |
| 263 index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, |
| 264 lzma_index **i, uint64_t memlimit) |
| 265 { |
| 266 lzma_next_coder_init(&index_decoder_init, next, allocator); |
| 267 |
| 268 if (i == NULL || memlimit == 0) |
| 269 return LZMA_PROG_ERROR; |
| 270 |
| 271 if (next->coder == NULL) { |
| 272 next->coder = lzma_alloc(sizeof(lzma_coder), allocator); |
| 273 if (next->coder == NULL) |
| 274 return LZMA_MEM_ERROR; |
| 275 |
| 276 next->code = &index_decode; |
| 277 next->end = &index_decoder_end; |
| 278 next->memconfig = &index_decoder_memconfig; |
| 279 next->coder->index = NULL; |
| 280 } else { |
| 281 lzma_index_end(next->coder->index, allocator); |
| 282 } |
| 283 |
| 284 return index_decoder_reset(next->coder, allocator, i, memlimit); |
| 285 } |
| 286 |
| 287 |
| 288 extern LZMA_API(lzma_ret) |
| 289 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) |
| 290 { |
| 291 lzma_next_strm_init(index_decoder_init, strm, i, memlimit); |
| 292 |
| 293 strm->internal->supported_actions[LZMA_RUN] = true; |
| 294 |
| 295 return LZMA_OK; |
| 296 } |
| 297 |
| 298 |
| 299 extern LZMA_API(lzma_ret) |
| 300 lzma_index_buffer_decode( |
| 301 lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator, |
| 302 const uint8_t *in, size_t *in_pos, size_t in_size) |
| 303 { |
| 304 // Sanity checks |
| 305 if (i == NULL || memlimit == NULL |
| 306 || in == NULL || in_pos == NULL || *in_pos > in_size) |
| 307 return LZMA_PROG_ERROR; |
| 308 |
| 309 // Initialize the decoder. |
| 310 lzma_coder coder; |
| 311 return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); |
| 312 |
| 313 // Store the input start position so that we can restore it in case |
| 314 // of an error. |
| 315 const size_t in_start = *in_pos; |
| 316 |
| 317 // Do the actual decoding. |
| 318 lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, |
| 319 NULL, NULL, 0, LZMA_RUN); |
| 320 |
| 321 if (ret == LZMA_STREAM_END) { |
| 322 ret = LZMA_OK; |
| 323 } else { |
| 324 // Something went wrong, free the Index structure and restore |
| 325 // the input position. |
| 326 lzma_index_end(coder.index, allocator); |
| 327 *in_pos = in_start; |
| 328 |
| 329 if (ret == LZMA_OK) { |
| 330 // The input is truncated or otherwise corrupt. |
| 331 // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR |
| 332 // like lzma_vli_decode() does in single-call mode. |
| 333 ret = LZMA_DATA_ERROR; |
| 334 |
| 335 } else if (ret == LZMA_MEMLIMIT_ERROR) { |
| 336 // Tell the caller how much memory would have |
| 337 // been needed. |
| 338 *memlimit = lzma_index_memusage(1, coder.count); |
| 339 } |
| 340 } |
| 341 |
| 342 return ret; |
| 343 } |
OLD | NEW |