| OLD | NEW |
| (Empty) |
| 1 // Copyright 2008 Google Inc. | |
| 2 // Author: Lincoln Smith | |
| 3 // | |
| 4 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 5 // you may not use this file except in compliance with the License. | |
| 6 // You may obtain a copy of the License at | |
| 7 // | |
| 8 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 9 // | |
| 10 // Unless required by applicable law or agreed to in writing, software | |
| 11 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 13 // See the License for the specific language governing permissions and | |
| 14 // limitations under the License. | |
| 15 | |
| 16 #ifndef OPEN_VCDIFF_HEADERPARSER_H_ | |
| 17 #define OPEN_VCDIFF_HEADERPARSER_H_ | |
| 18 | |
| 19 #include <config.h> | |
| 20 #include <stdint.h> // int32_t, uint32_t | |
| 21 #include <cstddef> // NULL | |
| 22 #include "checksum.h" // VCDChecksum | |
| 23 #include "vcdiff_defs.h" // VCDiffResult | |
| 24 | |
| 25 namespace open_vcdiff { | |
| 26 | |
| 27 // This class contains a contiguous memory buffer with start and end pointers, | |
| 28 // as well as a position pointer which shows how much of the buffer has been | |
| 29 // parsed and how much remains. | |
| 30 // | |
| 31 // Because no virtual destructor is defined for ParseableChunk, a pointer to | |
| 32 // a child class of ParseableChunk must be destroyed using its specific type, | |
| 33 // rather than as a ParseableChunk*. | |
| 34 class ParseableChunk { | |
| 35 public: | |
| 36 ParseableChunk(const char* data_start, size_t data_size) { | |
| 37 SetDataBuffer(data_start, data_size); | |
| 38 } | |
| 39 | |
| 40 const char* End() const { return end_; } | |
| 41 | |
| 42 // The number of bytes remaining to be parsed. This is not necessarily the | |
| 43 // same as the initial size of the buffer; it changes with each call to | |
| 44 // Advance(). | |
| 45 size_t UnparsedSize() const { | |
| 46 return end_ - position_; | |
| 47 } | |
| 48 | |
| 49 // The number of bytes that have already been parsed. | |
| 50 size_t ParsedSize() const { | |
| 51 return position_ - start_; | |
| 52 } | |
| 53 | |
| 54 bool Empty() const { return 0 == UnparsedSize(); } | |
| 55 | |
| 56 // The start of the data remaining to be parsed. | |
| 57 const char* UnparsedData() const { return position_; } | |
| 58 | |
| 59 // Returns a pointer to the start of the data remaining to be parsed. | |
| 60 const char** UnparsedDataAddr() { return &position_; } | |
| 61 | |
| 62 // Moves the parsing position forward by number_of_bytes. | |
| 63 void Advance(size_t number_of_bytes); | |
| 64 | |
| 65 // Jumps the parsing position to a new location. | |
| 66 void SetPosition(const char* position); | |
| 67 | |
| 68 // Jumps the parsing position to the end of the data chunk. | |
| 69 void Finish() { | |
| 70 position_ = end_; | |
| 71 } | |
| 72 | |
| 73 // Jumps the parsing position so that there are now number_of_bytes | |
| 74 // bytes left to parse. This number should be smaller than the size of data | |
| 75 // to be parsed before the function was called. | |
| 76 void FinishExcept(size_t number_of_bytes); | |
| 77 | |
| 78 void SetDataBuffer(const char* data_start, size_t data_size) { | |
| 79 start_ = data_start; | |
| 80 end_ = data_start + data_size; | |
| 81 position_ = start_; | |
| 82 } | |
| 83 | |
| 84 private: | |
| 85 const char* start_; | |
| 86 const char* end_; | |
| 87 | |
| 88 // The current parsing position within the data chunk. | |
| 89 // Must always respect start_ <= position_ <= end_. | |
| 90 const char* position_; | |
| 91 | |
| 92 // Making these private avoids implicit copy constructor & assignment operator | |
| 93 ParseableChunk(const ParseableChunk&); | |
| 94 void operator=(const ParseableChunk&); | |
| 95 }; | |
| 96 | |
| 97 // Represents one of the three sections in the delta window, as described in | |
| 98 // RFC section 4.3: | |
| 99 // * Data section for ADDs and RUNs | |
| 100 // * Instructions and sizes section | |
| 101 // * Addresses section for COPYs | |
| 102 // When using the interleaved format, data and addresses are pulled from the | |
| 103 // instructions and sizes section rather than being stored in separate sections. | |
| 104 // For that reason, this class allows one DeltaWindowSection to be based on | |
| 105 // another, such that the same position pointer is shared by both sections; | |
| 106 // i.e., UnparsedDataAddr() returns the same value for both objects. | |
| 107 // To achieve this end, one extra level of indirection (a pointer to a | |
| 108 // ParseableChunk object) is added. | |
| 109 class DeltaWindowSection { | |
| 110 public: | |
| 111 DeltaWindowSection() : parseable_chunk_(NULL), owned_(true) { } | |
| 112 | |
| 113 ~DeltaWindowSection() { | |
| 114 FreeChunk(); | |
| 115 } | |
| 116 | |
| 117 void Init(const char* data_start, size_t data_size) { | |
| 118 if (owned_ && parseable_chunk_) { | |
| 119 // Reuse the already-allocated ParseableChunk object. | |
| 120 parseable_chunk_->SetDataBuffer(data_start, data_size); | |
| 121 } else { | |
| 122 parseable_chunk_ = new ParseableChunk(data_start, data_size); | |
| 123 owned_ = true; | |
| 124 } | |
| 125 } | |
| 126 | |
| 127 void Init(DeltaWindowSection* original) { | |
| 128 FreeChunk(); | |
| 129 parseable_chunk_ = original->parseable_chunk_; | |
| 130 owned_ = false; | |
| 131 } | |
| 132 | |
| 133 void Invalidate() { FreeChunk(); } | |
| 134 | |
| 135 bool IsOwned() const { return owned_; } | |
| 136 | |
| 137 // The following functions just pass their arguments to the underlying | |
| 138 // ParseableChunk object. | |
| 139 | |
| 140 const char* End() const { | |
| 141 return parseable_chunk_->End(); | |
| 142 } | |
| 143 | |
| 144 size_t UnparsedSize() const { | |
| 145 return parseable_chunk_->UnparsedSize(); | |
| 146 } | |
| 147 | |
| 148 size_t ParsedSize() const { | |
| 149 return parseable_chunk_->ParsedSize(); | |
| 150 } | |
| 151 | |
| 152 bool Empty() const { | |
| 153 return parseable_chunk_->Empty(); | |
| 154 } | |
| 155 | |
| 156 const char* UnparsedData() const { | |
| 157 return parseable_chunk_->UnparsedData(); | |
| 158 } | |
| 159 | |
| 160 const char** UnparsedDataAddr() { | |
| 161 return parseable_chunk_->UnparsedDataAddr(); | |
| 162 } | |
| 163 | |
| 164 void Advance(size_t number_of_bytes) { | |
| 165 return parseable_chunk_->Advance(number_of_bytes); | |
| 166 } | |
| 167 private: | |
| 168 void FreeChunk() { | |
| 169 if (owned_) { | |
| 170 delete parseable_chunk_; | |
| 171 } | |
| 172 parseable_chunk_ = NULL; | |
| 173 } | |
| 174 | |
| 175 // Will be NULL until Init() has been called. If owned_ is true, this will | |
| 176 // point to a ParseableChunk object that has been allocated with "new" and | |
| 177 // must be deleted by this DeltaWindowSection object. If owned_ is false, | |
| 178 // this points at the parseable_chunk_ owned by a different DeltaWindowSection | |
| 179 // object. In this case, it is important to free the DeltaWindowSection which | |
| 180 // does not own the ParseableChunk before (or simultaneously to) freeing the | |
| 181 // DeltaWindowSection that owns it, or else deleted memory may be accessed. | |
| 182 ParseableChunk* parseable_chunk_; | |
| 183 bool owned_; | |
| 184 | |
| 185 // Making these private avoids implicit copy constructor & assignment operator | |
| 186 DeltaWindowSection(const DeltaWindowSection&); | |
| 187 void operator=(const DeltaWindowSection&); | |
| 188 }; | |
| 189 | |
| 190 // Used to parse the bytes and Varints that make up the delta file header | |
| 191 // or delta window header. | |
| 192 class VCDiffHeaderParser { | |
| 193 public: | |
| 194 // The maximum allowable size of a target window. This restricts the amount | |
| 195 // of memory that can be allocated by the decoder. A maliciously formulated | |
| 196 // delta file can create a target window of any arbitrary size, so the | |
| 197 // decoder needs to be sure that it can allocate this much memory using | |
| 198 // std::string::reserve(). | |
| 199 // | |
| 200 static const size_t kMaxTargetWindowSize = 1 << 26; // 64 MB | |
| 201 | |
| 202 // header_start should be the start of the header to be parsed; | |
| 203 // data_end is the position just after the last byte of available data | |
| 204 // (which may extend far past the end of the header.) | |
| 205 VCDiffHeaderParser(const char* header_start, const char* data_end); | |
| 206 | |
| 207 // One of these functions should be called for each element of the header. | |
| 208 // variable_description is a description of the value that we are attempting | |
| 209 // to parse, and will only be used to create descriptive error messages. | |
| 210 // If the function returns true, then the element was parsed successfully | |
| 211 // and its value has been placed in *value. If the function returns false, | |
| 212 // then *value is unchanged, and GetResult() can be called to return the | |
| 213 // reason that the element could not be parsed, which will be either | |
| 214 // RESULT_ERROR (an error occurred), or RESULT_END_OF_DATA (the limit data_end | |
| 215 // was reached before the end of the element to be parsed.) Once one of these | |
| 216 // functions has returned false, further calls to any of the Parse... | |
| 217 // functions will also return false without performing any additional actions. | |
| 218 // Typical usage is as follows: | |
| 219 // int32_t segment_length = 0; | |
| 220 // if (!header_parser.ParseInt32("segment length", &segment_length)) { | |
| 221 // return header_parser.GetResult(); | |
| 222 // } | |
| 223 // | |
| 224 // The following example takes advantage of the fact that calling a Parse... | |
| 225 // function after an error or end-of-data condition is legal and does nothing. | |
| 226 // It can thus parse more than one element in a row and check the status | |
| 227 // afterwards. If the first call to ParseInt32() fails, the second will have | |
| 228 // no effect: | |
| 229 // | |
| 230 // int32_t segment_length = 0, segment_position = 0; | |
| 231 // header_parser.ParseInt32("segment length", &segment_length)); | |
| 232 // header_parser.ParseInt32("segment position", &segment_position)); | |
| 233 // if (RESULT_SUCCESS != header_parser.GetResult()) { | |
| 234 // return header_parser.GetResult(); | |
| 235 // } | |
| 236 // | |
| 237 bool ParseByte(unsigned char* value); | |
| 238 bool ParseInt32(const char* variable_description, int32_t* value); | |
| 239 bool ParseUInt32(const char* variable_description, uint32_t* value); | |
| 240 bool ParseChecksum(const char* variable_description, VCDChecksum* value); | |
| 241 bool ParseSize(const char* variable_description, size_t* value); | |
| 242 | |
| 243 // Parses the first three elements of the delta window header: | |
| 244 // | |
| 245 // Win_Indicator - byte | |
| 246 // [Source segment size] - integer (VarintBE format) | |
| 247 // [Source segment position] - integer (VarintBE format) | |
| 248 // | |
| 249 // Returns true if the values were parsed successfully and the values were | |
| 250 // found to be acceptable. Returns false otherwise, in which case | |
| 251 // GetResult() can be called to return the reason that the two values | |
| 252 // could not be validated. This will be either RESULT_ERROR (an error | |
| 253 // occurred and was logged), or RESULT_END_OF_DATA (the limit data_end was | |
| 254 // reached before the end of the values to be parsed.) If return value is | |
| 255 // true, then *win_indicator, *source_segment_length, and | |
| 256 // *source_segment_position are populated with the parsed values. Otherwise, | |
| 257 // the values of these output arguments are undefined. | |
| 258 // | |
| 259 // dictionary_size: The size of the dictionary (source) file. Used to | |
| 260 // validate the limits of source_segment_length and | |
| 261 // source_segment_position if the source segment is taken from the | |
| 262 // dictionary (i.e., if the parsed *win_indicator equals VCD_SOURCE.) | |
| 263 // decoded_target_size: The size of the target data that has been decoded | |
| 264 // so far, including all target windows. Used to validate the limits of | |
| 265 // source_segment_length and source_segment_position if the source segment | |
| 266 // is taken from the target (i.e., if the parsed *win_indicator equals | |
| 267 // VCD_TARGET.) | |
| 268 // win_indicator (output): Points to a single unsigned char (not an array) | |
| 269 // that will receive the parsed value of Win_Indicator. | |
| 270 // source_segment_length (output): The parsed length of the source segment. | |
| 271 // source_segment_position (output): The parsed zero-based index in the | |
| 272 // source/target file from which the source segment is to be taken. | |
| 273 bool ParseWinIndicatorAndSourceSegment(size_t dictionary_size, | |
| 274 size_t decoded_target_size, | |
| 275 unsigned char* win_indicator, | |
| 276 size_t* source_segment_length, | |
| 277 size_t* source_segment_position); | |
| 278 | |
| 279 // Parses the following two elements of the delta window header: | |
| 280 // | |
| 281 // Length of the delta encoding - integer (VarintBE format) | |
| 282 // Size of the target window - integer (VarintBE format) | |
| 283 // | |
| 284 // Return conditions and values are the same as for | |
| 285 // ParseWinIndicatorAndSourceSegment(), above. | |
| 286 // | |
| 287 bool ParseWindowLengths(size_t* target_window_length); | |
| 288 | |
| 289 // May only be called after ParseWindowLengths() has returned RESULT_SUCCESS. | |
| 290 // Returns a pointer to the end of the delta window (which might not point to | |
| 291 // a valid memory location if there is insufficient input data.) | |
| 292 // | |
| 293 const char* EndOfDeltaWindow() const; | |
| 294 | |
| 295 // Parses the following element of the delta window header: | |
| 296 // | |
| 297 // Delta_Indicator - byte | |
| 298 // | |
| 299 // Because none of the bits in Delta_Indicator are used by this implementation | |
| 300 // of VCDIFF, this function does not have an output argument to return the | |
| 301 // value of that field. It may return RESULT_SUCCESS, RESULT_ERROR, or | |
| 302 // RESULT_END_OF_DATA as with the other Parse...() functions. | |
| 303 // | |
| 304 bool ParseDeltaIndicator(); | |
| 305 | |
| 306 // Parses the following 3 elements of the delta window header: | |
| 307 // | |
| 308 // Length of data for ADDs and RUNs - integer (VarintBE format) | |
| 309 // Length of instructions and sizes - integer (VarintBE format) | |
| 310 // Length of addresses for COPYs - integer (VarintBE format) | |
| 311 // | |
| 312 // If has_checksum is true, it also looks for the following element: | |
| 313 // | |
| 314 // Adler32 checksum - unsigned 32-bit integer (VarintBE format) | |
| 315 // | |
| 316 // Return conditions and values are the same as for | |
| 317 // ParseWinIndicatorAndSourceSegment(), above. | |
| 318 // | |
| 319 bool ParseSectionLengths(bool has_checksum, | |
| 320 size_t* add_and_run_data_length, | |
| 321 size_t* instructions_and_sizes_length, | |
| 322 size_t* addresses_length, | |
| 323 VCDChecksum* checksum); | |
| 324 | |
| 325 // If one of the Parse... functions returned false, this function | |
| 326 // can be used to find the result code (RESULT_ERROR or RESULT_END_OF_DATA) | |
| 327 // describing the reason for the most recent parse failure. If none of the | |
| 328 // Parse... functions has returned false, returns RESULT_SUCCESS. | |
| 329 VCDiffResult GetResult() const { | |
| 330 return return_code_; | |
| 331 } | |
| 332 | |
| 333 // The following functions just pass their arguments to the underlying | |
| 334 // ParseableChunk object. | |
| 335 | |
| 336 const char* End() const { | |
| 337 return parseable_chunk_.End(); | |
| 338 } | |
| 339 | |
| 340 size_t UnparsedSize() const { | |
| 341 return parseable_chunk_.UnparsedSize(); | |
| 342 } | |
| 343 | |
| 344 size_t ParsedSize() const { | |
| 345 return parseable_chunk_.ParsedSize(); | |
| 346 } | |
| 347 | |
| 348 const char* UnparsedData() const { | |
| 349 return parseable_chunk_.UnparsedData(); | |
| 350 } | |
| 351 | |
| 352 private: | |
| 353 // Parses two variable-length integers representing the source segment length | |
| 354 // and source segment position (== offset.) Checks whether the source segment | |
| 355 // length and position would cause it to exceed the size of the source file or | |
| 356 // target file. Returns true if the values were parsed successfully and the | |
| 357 // values were found to be acceptable. Returns false otherwise, in which case | |
| 358 // GetResult() can be called to return the reason that the two values could | |
| 359 // not be validated, which will be either RESULT_ERROR (an error occurred and | |
| 360 // was logged), or RESULT_END_OF_DATA (the limit data_end was reached before | |
| 361 // the end of the integers to be parsed.) | |
| 362 // from_size: The requested size of the source segment. | |
| 363 // from_boundary_name: A NULL-terminated string naming the end of the | |
| 364 // source or target file, used in error messages. | |
| 365 // from_name: A NULL-terminated string naming the source or target file, | |
| 366 // also used in error messages. | |
| 367 // source_segment_length (output): The parsed length of the source segment. | |
| 368 // source_segment_position (output): The parsed zero-based index in the | |
| 369 // source/target file from which the source segment is to be taken. | |
| 370 // | |
| 371 bool ParseSourceSegmentLengthAndPosition(size_t from_size, | |
| 372 const char* from_boundary_name, | |
| 373 const char* from_name, | |
| 374 size_t* source_segment_length, | |
| 375 size_t* source_segment_position); | |
| 376 | |
| 377 ParseableChunk parseable_chunk_; | |
| 378 | |
| 379 // Contains the result code of the last Parse...() operation that failed | |
| 380 // (RESULT_ERROR or RESULT_END_OF_DATA). If no Parse...() method has been | |
| 381 // called, or if all calls to Parse...() were successful, then this contains | |
| 382 // RESULT_SUCCESS. | |
| 383 VCDiffResult return_code_; | |
| 384 | |
| 385 // Will be zero until ParseWindowLengths() has been called. After | |
| 386 // ParseWindowLengths() has been called successfully, this contains the | |
| 387 // parsed length of the delta encoding. | |
| 388 size_t delta_encoding_length_; | |
| 389 | |
| 390 // Will be NULL until ParseWindowLengths() has been called. After | |
| 391 // ParseWindowLengths() has been called successfully, this points to the | |
| 392 // beginning of the section of the current window titled "The delta encoding" | |
| 393 // in the RFC, i.e., to the position just after the length of the delta | |
| 394 // encoding. | |
| 395 const char* delta_encoding_start_; | |
| 396 | |
| 397 // Making these private avoids implicit copy constructor & assignment operator | |
| 398 VCDiffHeaderParser(const VCDiffHeaderParser&); | |
| 399 void operator=(const VCDiffHeaderParser&); | |
| 400 }; | |
| 401 | |
| 402 } // namespace open_vcdiff | |
| 403 | |
| 404 #endif // OPEN_VCDIFF_HEADERPARSER_H_ | |
| OLD | NEW |