OLD | NEW |
(Empty) | |
| 1 /////////////////////////////////////////////////////////////////////////////// |
| 2 // |
| 3 /// \file index_hash.c |
| 4 /// \brief Validates Index by using a hash function |
| 5 // |
| 6 // Author: Lasse Collin |
| 7 // |
| 8 // This file has been put into the public domain. |
| 9 // You can do whatever you want with this file. |
| 10 // |
| 11 /////////////////////////////////////////////////////////////////////////////// |
| 12 |
| 13 #include "common.h" |
| 14 #include "index.h" |
| 15 #include "check.h" |
| 16 |
| 17 |
| 18 typedef struct { |
| 19 /// Sum of the Block sizes (including Block Padding) |
| 20 lzma_vli blocks_size; |
| 21 |
| 22 /// Sum of the Uncompressed Size fields |
| 23 lzma_vli uncompressed_size; |
| 24 |
| 25 /// Number of Records |
| 26 lzma_vli count; |
| 27 |
| 28 /// Size of the List of Index Records as bytes |
| 29 lzma_vli index_list_size; |
| 30 |
| 31 /// Check calculated from Unpadded Sizes and Uncompressed Sizes. |
| 32 lzma_check_state check; |
| 33 |
| 34 } lzma_index_hash_info; |
| 35 |
| 36 |
| 37 struct lzma_index_hash_s { |
| 38 enum { |
| 39 SEQ_BLOCK, |
| 40 SEQ_COUNT, |
| 41 SEQ_UNPADDED, |
| 42 SEQ_UNCOMPRESSED, |
| 43 SEQ_PADDING_INIT, |
| 44 SEQ_PADDING, |
| 45 SEQ_CRC32, |
| 46 } sequence; |
| 47 |
| 48 /// Information collected while decoding the actual Blocks. |
| 49 lzma_index_hash_info blocks; |
| 50 |
| 51 /// Information collected from the Index field. |
| 52 lzma_index_hash_info records; |
| 53 |
| 54 /// Number of Records not fully decoded |
| 55 lzma_vli remaining; |
| 56 |
| 57 /// Unpadded Size currently being read from an Index Record. |
| 58 lzma_vli unpadded_size; |
| 59 |
| 60 /// Uncompressed Size currently being read from an Index Record. |
| 61 lzma_vli uncompressed_size; |
| 62 |
| 63 /// Position in variable-length integers when decoding them from |
| 64 /// the List of Records. |
| 65 size_t pos; |
| 66 |
| 67 /// CRC32 of the Index |
| 68 uint32_t crc32; |
| 69 }; |
| 70 |
| 71 |
| 72 extern LZMA_API(lzma_index_hash *) |
| 73 lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) |
| 74 { |
| 75 if (index_hash == NULL) { |
| 76 index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); |
| 77 if (index_hash == NULL) |
| 78 return NULL; |
| 79 } |
| 80 |
| 81 index_hash->sequence = SEQ_BLOCK; |
| 82 index_hash->blocks.blocks_size = 0; |
| 83 index_hash->blocks.uncompressed_size = 0; |
| 84 index_hash->blocks.count = 0; |
| 85 index_hash->blocks.index_list_size = 0; |
| 86 index_hash->records.blocks_size = 0; |
| 87 index_hash->records.uncompressed_size = 0; |
| 88 index_hash->records.count = 0; |
| 89 index_hash->records.index_list_size = 0; |
| 90 index_hash->unpadded_size = 0; |
| 91 index_hash->uncompressed_size = 0; |
| 92 index_hash->pos = 0; |
| 93 index_hash->crc32 = 0; |
| 94 |
| 95 // These cannot fail because LZMA_CHECK_BEST is known to be supported. |
| 96 (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); |
| 97 (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); |
| 98 |
| 99 return index_hash; |
| 100 } |
| 101 |
| 102 |
| 103 extern LZMA_API(void) |
| 104 lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) |
| 105 { |
| 106 lzma_free(index_hash, allocator); |
| 107 return; |
| 108 } |
| 109 |
| 110 |
| 111 extern LZMA_API(lzma_vli) |
| 112 lzma_index_hash_size(const lzma_index_hash *index_hash) |
| 113 { |
| 114 // Get the size of the Index from ->blocks instead of ->records for |
| 115 // cases where application wants to know the Index Size before |
| 116 // decoding the Index. |
| 117 return index_size(index_hash->blocks.count, |
| 118 index_hash->blocks.index_list_size); |
| 119 } |
| 120 |
| 121 |
| 122 /// Updates the sizes and the hash without any validation. |
| 123 static lzma_ret |
| 124 hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, |
| 125 lzma_vli uncompressed_size) |
| 126 { |
| 127 info->blocks_size += vli_ceil4(unpadded_size); |
| 128 info->uncompressed_size += uncompressed_size; |
| 129 info->index_list_size += lzma_vli_size(unpadded_size) |
| 130 + lzma_vli_size(uncompressed_size); |
| 131 ++info->count; |
| 132 |
| 133 const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; |
| 134 lzma_check_update(&info->check, LZMA_CHECK_BEST, |
| 135 (const uint8_t *)(sizes), sizeof(sizes)); |
| 136 |
| 137 return LZMA_OK; |
| 138 } |
| 139 |
| 140 |
| 141 extern LZMA_API(lzma_ret) |
| 142 lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, |
| 143 lzma_vli uncompressed_size) |
| 144 { |
| 145 // Validate the arguments. |
| 146 if (index_hash->sequence != SEQ_BLOCK |
| 147 || unpadded_size < UNPADDED_SIZE_MIN |
| 148 || unpadded_size > UNPADDED_SIZE_MAX |
| 149 || uncompressed_size > LZMA_VLI_MAX) |
| 150 return LZMA_PROG_ERROR; |
| 151 |
| 152 // Update the hash. |
| 153 return_if_error(hash_append(&index_hash->blocks, |
| 154 unpadded_size, uncompressed_size)); |
| 155 |
| 156 // Validate the properties of *info are still in allowed limits. |
| 157 if (index_hash->blocks.blocks_size > LZMA_VLI_MAX |
| 158 || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX |
| 159 || index_size(index_hash->blocks.count, |
| 160 index_hash->blocks.index_list_size) |
| 161 > LZMA_BACKWARD_SIZE_MAX |
| 162 || index_stream_size(index_hash->blocks.blocks_size, |
| 163 index_hash->blocks.count, |
| 164 index_hash->blocks.index_list_size) |
| 165 > LZMA_VLI_MAX) |
| 166 return LZMA_DATA_ERROR; |
| 167 |
| 168 return LZMA_OK; |
| 169 } |
| 170 |
| 171 |
| 172 extern LZMA_API(lzma_ret) |
| 173 lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, |
| 174 size_t *in_pos, size_t in_size) |
| 175 { |
| 176 // Catch zero input buffer here, because in contrast to Index encoder |
| 177 // and decoder functions, applications call this function directly |
| 178 // instead of via lzma_code(), which does the buffer checking. |
| 179 if (*in_pos >= in_size) |
| 180 return LZMA_BUF_ERROR; |
| 181 |
| 182 // NOTE: This function has many similarities to index_encode() and |
| 183 // index_decode() functions found from index_encoder.c and |
| 184 // index_decoder.c. See the comments especially in index_encoder.c. |
| 185 const size_t in_start = *in_pos; |
| 186 lzma_ret ret = LZMA_OK; |
| 187 |
| 188 while (*in_pos < in_size) |
| 189 switch (index_hash->sequence) { |
| 190 case SEQ_BLOCK: |
| 191 // Check the Index Indicator is present. |
| 192 if (in[(*in_pos)++] != 0x00) |
| 193 return LZMA_DATA_ERROR; |
| 194 |
| 195 index_hash->sequence = SEQ_COUNT; |
| 196 break; |
| 197 |
| 198 case SEQ_COUNT: { |
| 199 ret = lzma_vli_decode(&index_hash->remaining, |
| 200 &index_hash->pos, in, in_pos, in_size); |
| 201 if (ret != LZMA_STREAM_END) |
| 202 goto out; |
| 203 |
| 204 // The count must match the count of the Blocks decoded. |
| 205 if (index_hash->remaining != index_hash->blocks.count) |
| 206 return LZMA_DATA_ERROR; |
| 207 |
| 208 ret = LZMA_OK; |
| 209 index_hash->pos = 0; |
| 210 |
| 211 // Handle the special case when there are no Blocks. |
| 212 index_hash->sequence = index_hash->remaining == 0 |
| 213 ? SEQ_PADDING_INIT : SEQ_UNPADDED; |
| 214 break; |
| 215 } |
| 216 |
| 217 case SEQ_UNPADDED: |
| 218 case SEQ_UNCOMPRESSED: { |
| 219 lzma_vli *size = index_hash->sequence == SEQ_UNPADDED |
| 220 ? &index_hash->unpadded_size |
| 221 : &index_hash->uncompressed_size; |
| 222 |
| 223 ret = lzma_vli_decode(size, &index_hash->pos, |
| 224 in, in_pos, in_size); |
| 225 if (ret != LZMA_STREAM_END) |
| 226 goto out; |
| 227 |
| 228 ret = LZMA_OK; |
| 229 index_hash->pos = 0; |
| 230 |
| 231 if (index_hash->sequence == SEQ_UNPADDED) { |
| 232 if (index_hash->unpadded_size < UNPADDED_SIZE_MIN |
| 233 || index_hash->unpadded_size |
| 234 > UNPADDED_SIZE_MAX) |
| 235 return LZMA_DATA_ERROR; |
| 236 |
| 237 index_hash->sequence = SEQ_UNCOMPRESSED; |
| 238 } else { |
| 239 // Update the hash. |
| 240 return_if_error(hash_append(&index_hash->records, |
| 241 index_hash->unpadded_size, |
| 242 index_hash->uncompressed_size)); |
| 243 |
| 244 // Verify that we don't go over the known sizes. Note |
| 245 // that this validation is simpler than the one used |
| 246 // in lzma_index_hash_append(), because here we know |
| 247 // that values in index_hash->blocks are already |
| 248 // validated and we are fine as long as we don't |
| 249 // exceed them in index_hash->records. |
| 250 if (index_hash->blocks.blocks_size |
| 251 < index_hash->records.blocks_size |
| 252 || index_hash->blocks.uncompressed_size |
| 253 < index_hash->records.uncompressed_size |
| 254 || index_hash->blocks.index_list_size |
| 255 < index_hash->records.index_list_size) |
| 256 return LZMA_DATA_ERROR; |
| 257 |
| 258 // Check if this was the last Record. |
| 259 index_hash->sequence = --index_hash->remaining == 0 |
| 260 ? SEQ_PADDING_INIT : SEQ_UNPADDED; |
| 261 } |
| 262 |
| 263 break; |
| 264 } |
| 265 |
| 266 case SEQ_PADDING_INIT: |
| 267 index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( |
| 268 index_hash->records.count, |
| 269 index_hash->records.index_list_size)) & 3; |
| 270 index_hash->sequence = SEQ_PADDING; |
| 271 |
| 272 // Fall through |
| 273 |
| 274 case SEQ_PADDING: |
| 275 if (index_hash->pos > 0) { |
| 276 --index_hash->pos; |
| 277 if (in[(*in_pos)++] != 0x00) |
| 278 return LZMA_DATA_ERROR; |
| 279 |
| 280 break; |
| 281 } |
| 282 |
| 283 // Compare the sizes. |
| 284 if (index_hash->blocks.blocks_size |
| 285 != index_hash->records.blocks_size |
| 286 || index_hash->blocks.uncompressed_size |
| 287 != index_hash->records.uncompressed_size |
| 288 || index_hash->blocks.index_list_size |
| 289 != index_hash->records.index_list_size) |
| 290 return LZMA_DATA_ERROR; |
| 291 |
| 292 // Finish the hashes and compare them. |
| 293 lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); |
| 294 lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); |
| 295 if (memcmp(index_hash->blocks.check.buffer.u8, |
| 296 index_hash->records.check.buffer.u8, |
| 297 lzma_check_size(LZMA_CHECK_BEST)) != 0) |
| 298 return LZMA_DATA_ERROR; |
| 299 |
| 300 // Finish the CRC32 calculation. |
| 301 index_hash->crc32 = lzma_crc32(in + in_start, |
| 302 *in_pos - in_start, index_hash->crc32); |
| 303 |
| 304 index_hash->sequence = SEQ_CRC32; |
| 305 |
| 306 // Fall through |
| 307 |
| 308 case SEQ_CRC32: |
| 309 do { |
| 310 if (*in_pos == in_size) |
| 311 return LZMA_OK; |
| 312 |
| 313 if (((index_hash->crc32 >> (index_hash->pos * 8)) |
| 314 & 0xFF) != in[(*in_pos)++]) |
| 315 return LZMA_DATA_ERROR; |
| 316 |
| 317 } while (++index_hash->pos < 4); |
| 318 |
| 319 return LZMA_STREAM_END; |
| 320 |
| 321 default: |
| 322 assert(0); |
| 323 return LZMA_PROG_ERROR; |
| 324 } |
| 325 |
| 326 out: |
| 327 // Update the CRC32, |
| 328 index_hash->crc32 = lzma_crc32(in + in_start, |
| 329 *in_pos - in_start, index_hash->crc32); |
| 330 |
| 331 return ret; |
| 332 } |
OLD | NEW |