Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(181)

Side by Side Diff: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/headerparser.h

Issue 5203: Transition to pulling open-vcdiff from repository, instead of using snapshot... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 #ifndef OPEN_VCDIFF_HEADERPARSER_H_
17 #define OPEN_VCDIFF_HEADERPARSER_H_
18
19 #include <config.h>
20 #include <stdint.h> // int32_t, uint32_t
21 #include <cstddef> // NULL
22 #include "checksum.h" // VCDChecksum
23 #include "vcdiff_defs.h" // VCDiffResult
24
25 namespace open_vcdiff {
26
27 // This class contains a contiguous memory buffer with start and end pointers,
28 // as well as a position pointer which shows how much of the buffer has been
29 // parsed and how much remains.
30 //
31 // Because no virtual destructor is defined for ParseableChunk, a pointer to
32 // a child class of ParseableChunk must be destroyed using its specific type,
33 // rather than as a ParseableChunk*.
34 class ParseableChunk {
35 public:
36 ParseableChunk(const char* data_start, size_t data_size) {
37 SetDataBuffer(data_start, data_size);
38 }
39
40 const char* End() const { return end_; }
41
42 // The number of bytes remaining to be parsed. This is not necessarily the
43 // same as the initial size of the buffer; it changes with each call to
44 // Advance().
45 size_t UnparsedSize() const {
46 return end_ - position_;
47 }
48
49 // The number of bytes that have already been parsed.
50 size_t ParsedSize() const {
51 return position_ - start_;
52 }
53
54 bool Empty() const { return 0 == UnparsedSize(); }
55
56 // The start of the data remaining to be parsed.
57 const char* UnparsedData() const { return position_; }
58
59 // Returns a pointer to the start of the data remaining to be parsed.
60 const char** UnparsedDataAddr() { return &position_; }
61
62 // Moves the parsing position forward by number_of_bytes.
63 void Advance(size_t number_of_bytes);
64
65 // Jumps the parsing position to a new location.
66 void SetPosition(const char* position);
67
68 // Jumps the parsing position to the end of the data chunk.
69 void Finish() {
70 position_ = end_;
71 }
72
73 // Jumps the parsing position so that there are now number_of_bytes
74 // bytes left to parse. This number should be smaller than the size of data
75 // to be parsed before the function was called.
76 void FinishExcept(size_t number_of_bytes);
77
78 void SetDataBuffer(const char* data_start, size_t data_size) {
79 start_ = data_start;
80 end_ = data_start + data_size;
81 position_ = start_;
82 }
83
84 private:
85 const char* start_;
86 const char* end_;
87
88 // The current parsing position within the data chunk.
89 // Must always respect start_ <= position_ <= end_.
90 const char* position_;
91
92 // Making these private avoids implicit copy constructor & assignment operator
93 ParseableChunk(const ParseableChunk&);
94 void operator=(const ParseableChunk&);
95 };
96
97 // Represents one of the three sections in the delta window, as described in
98 // RFC section 4.3:
99 // * Data section for ADDs and RUNs
100 // * Instructions and sizes section
101 // * Addresses section for COPYs
102 // When using the interleaved format, data and addresses are pulled from the
103 // instructions and sizes section rather than being stored in separate sections.
104 // For that reason, this class allows one DeltaWindowSection to be based on
105 // another, such that the same position pointer is shared by both sections;
106 // i.e., UnparsedDataAddr() returns the same value for both objects.
107 // To achieve this end, one extra level of indirection (a pointer to a
108 // ParseableChunk object) is added.
109 class DeltaWindowSection {
110 public:
111 DeltaWindowSection() : parseable_chunk_(NULL), owned_(true) { }
112
113 ~DeltaWindowSection() {
114 FreeChunk();
115 }
116
117 void Init(const char* data_start, size_t data_size) {
118 if (owned_ && parseable_chunk_) {
119 // Reuse the already-allocated ParseableChunk object.
120 parseable_chunk_->SetDataBuffer(data_start, data_size);
121 } else {
122 parseable_chunk_ = new ParseableChunk(data_start, data_size);
123 owned_ = true;
124 }
125 }
126
127 void Init(DeltaWindowSection* original) {
128 FreeChunk();
129 parseable_chunk_ = original->parseable_chunk_;
130 owned_ = false;
131 }
132
133 void Invalidate() { FreeChunk(); }
134
135 bool IsOwned() const { return owned_; }
136
137 // The following functions just pass their arguments to the underlying
138 // ParseableChunk object.
139
140 const char* End() const {
141 return parseable_chunk_->End();
142 }
143
144 size_t UnparsedSize() const {
145 return parseable_chunk_->UnparsedSize();
146 }
147
148 size_t ParsedSize() const {
149 return parseable_chunk_->ParsedSize();
150 }
151
152 bool Empty() const {
153 return parseable_chunk_->Empty();
154 }
155
156 const char* UnparsedData() const {
157 return parseable_chunk_->UnparsedData();
158 }
159
160 const char** UnparsedDataAddr() {
161 return parseable_chunk_->UnparsedDataAddr();
162 }
163
164 void Advance(size_t number_of_bytes) {
165 return parseable_chunk_->Advance(number_of_bytes);
166 }
167 private:
168 void FreeChunk() {
169 if (owned_) {
170 delete parseable_chunk_;
171 }
172 parseable_chunk_ = NULL;
173 }
174
175 // Will be NULL until Init() has been called. If owned_ is true, this will
176 // point to a ParseableChunk object that has been allocated with "new" and
177 // must be deleted by this DeltaWindowSection object. If owned_ is false,
178 // this points at the parseable_chunk_ owned by a different DeltaWindowSection
179 // object. In this case, it is important to free the DeltaWindowSection which
180 // does not own the ParseableChunk before (or simultaneously to) freeing the
181 // DeltaWindowSection that owns it, or else deleted memory may be accessed.
182 ParseableChunk* parseable_chunk_;
183 bool owned_;
184
185 // Making these private avoids implicit copy constructor & assignment operator
186 DeltaWindowSection(const DeltaWindowSection&);
187 void operator=(const DeltaWindowSection&);
188 };
189
190 // Used to parse the bytes and Varints that make up the delta file header
191 // or delta window header.
192 class VCDiffHeaderParser {
193 public:
194 // The maximum allowable size of a target window. This restricts the amount
195 // of memory that can be allocated by the decoder. A maliciously formulated
196 // delta file can create a target window of any arbitrary size, so the
197 // decoder needs to be sure that it can allocate this much memory using
198 // std::string::reserve().
199 //
200 static const size_t kMaxTargetWindowSize = 1 << 26; // 64 MB
201
202 // header_start should be the start of the header to be parsed;
203 // data_end is the position just after the last byte of available data
204 // (which may extend far past the end of the header.)
205 VCDiffHeaderParser(const char* header_start, const char* data_end);
206
207 // One of these functions should be called for each element of the header.
208 // variable_description is a description of the value that we are attempting
209 // to parse, and will only be used to create descriptive error messages.
210 // If the function returns true, then the element was parsed successfully
211 // and its value has been placed in *value. If the function returns false,
212 // then *value is unchanged, and GetResult() can be called to return the
213 // reason that the element could not be parsed, which will be either
214 // RESULT_ERROR (an error occurred), or RESULT_END_OF_DATA (the limit data_end
215 // was reached before the end of the element to be parsed.) Once one of these
216 // functions has returned false, further calls to any of the Parse...
217 // functions will also return false without performing any additional actions.
218 // Typical usage is as follows:
219 // int32_t segment_length = 0;
220 // if (!header_parser.ParseInt32("segment length", &segment_length)) {
221 // return header_parser.GetResult();
222 // }
223 //
224 // The following example takes advantage of the fact that calling a Parse...
225 // function after an error or end-of-data condition is legal and does nothing.
226 // It can thus parse more than one element in a row and check the status
227 // afterwards. If the first call to ParseInt32() fails, the second will have
228 // no effect:
229 //
230 // int32_t segment_length = 0, segment_position = 0;
231 // header_parser.ParseInt32("segment length", &segment_length));
232 // header_parser.ParseInt32("segment position", &segment_position));
233 // if (RESULT_SUCCESS != header_parser.GetResult()) {
234 // return header_parser.GetResult();
235 // }
236 //
237 bool ParseByte(unsigned char* value);
238 bool ParseInt32(const char* variable_description, int32_t* value);
239 bool ParseUInt32(const char* variable_description, uint32_t* value);
240 bool ParseChecksum(const char* variable_description, VCDChecksum* value);
241 bool ParseSize(const char* variable_description, size_t* value);
242
243 // Parses the first three elements of the delta window header:
244 //
245 // Win_Indicator - byte
246 // [Source segment size] - integer (VarintBE format)
247 // [Source segment position] - integer (VarintBE format)
248 //
249 // Returns true if the values were parsed successfully and the values were
250 // found to be acceptable. Returns false otherwise, in which case
251 // GetResult() can be called to return the reason that the two values
252 // could not be validated. This will be either RESULT_ERROR (an error
253 // occurred and was logged), or RESULT_END_OF_DATA (the limit data_end was
254 // reached before the end of the values to be parsed.) If return value is
255 // true, then *win_indicator, *source_segment_length, and
256 // *source_segment_position are populated with the parsed values. Otherwise,
257 // the values of these output arguments are undefined.
258 //
259 // dictionary_size: The size of the dictionary (source) file. Used to
260 // validate the limits of source_segment_length and
261 // source_segment_position if the source segment is taken from the
262 // dictionary (i.e., if the parsed *win_indicator equals VCD_SOURCE.)
263 // decoded_target_size: The size of the target data that has been decoded
264 // so far, including all target windows. Used to validate the limits of
265 // source_segment_length and source_segment_position if the source segment
266 // is taken from the target (i.e., if the parsed *win_indicator equals
267 // VCD_TARGET.)
268 // win_indicator (output): Points to a single unsigned char (not an array)
269 // that will receive the parsed value of Win_Indicator.
270 // source_segment_length (output): The parsed length of the source segment.
271 // source_segment_position (output): The parsed zero-based index in the
272 // source/target file from which the source segment is to be taken.
273 bool ParseWinIndicatorAndSourceSegment(size_t dictionary_size,
274 size_t decoded_target_size,
275 unsigned char* win_indicator,
276 size_t* source_segment_length,
277 size_t* source_segment_position);
278
279 // Parses the following two elements of the delta window header:
280 //
281 // Length of the delta encoding - integer (VarintBE format)
282 // Size of the target window - integer (VarintBE format)
283 //
284 // Return conditions and values are the same as for
285 // ParseWinIndicatorAndSourceSegment(), above.
286 //
287 bool ParseWindowLengths(size_t* target_window_length);
288
289 // May only be called after ParseWindowLengths() has returned RESULT_SUCCESS.
290 // Returns a pointer to the end of the delta window (which might not point to
291 // a valid memory location if there is insufficient input data.)
292 //
293 const char* EndOfDeltaWindow() const;
294
295 // Parses the following element of the delta window header:
296 //
297 // Delta_Indicator - byte
298 //
299 // Because none of the bits in Delta_Indicator are used by this implementation
300 // of VCDIFF, this function does not have an output argument to return the
301 // value of that field. It may return RESULT_SUCCESS, RESULT_ERROR, or
302 // RESULT_END_OF_DATA as with the other Parse...() functions.
303 //
304 bool ParseDeltaIndicator();
305
306 // Parses the following 3 elements of the delta window header:
307 //
308 // Length of data for ADDs and RUNs - integer (VarintBE format)
309 // Length of instructions and sizes - integer (VarintBE format)
310 // Length of addresses for COPYs - integer (VarintBE format)
311 //
312 // If has_checksum is true, it also looks for the following element:
313 //
314 // Adler32 checksum - unsigned 32-bit integer (VarintBE format)
315 //
316 // Return conditions and values are the same as for
317 // ParseWinIndicatorAndSourceSegment(), above.
318 //
319 bool ParseSectionLengths(bool has_checksum,
320 size_t* add_and_run_data_length,
321 size_t* instructions_and_sizes_length,
322 size_t* addresses_length,
323 VCDChecksum* checksum);
324
325 // If one of the Parse... functions returned false, this function
326 // can be used to find the result code (RESULT_ERROR or RESULT_END_OF_DATA)
327 // describing the reason for the most recent parse failure. If none of the
328 // Parse... functions has returned false, returns RESULT_SUCCESS.
329 VCDiffResult GetResult() const {
330 return return_code_;
331 }
332
333 // The following functions just pass their arguments to the underlying
334 // ParseableChunk object.
335
336 const char* End() const {
337 return parseable_chunk_.End();
338 }
339
340 size_t UnparsedSize() const {
341 return parseable_chunk_.UnparsedSize();
342 }
343
344 size_t ParsedSize() const {
345 return parseable_chunk_.ParsedSize();
346 }
347
348 const char* UnparsedData() const {
349 return parseable_chunk_.UnparsedData();
350 }
351
352 private:
353 // Parses two variable-length integers representing the source segment length
354 // and source segment position (== offset.) Checks whether the source segment
355 // length and position would cause it to exceed the size of the source file or
356 // target file. Returns true if the values were parsed successfully and the
357 // values were found to be acceptable. Returns false otherwise, in which case
358 // GetResult() can be called to return the reason that the two values could
359 // not be validated, which will be either RESULT_ERROR (an error occurred and
360 // was logged), or RESULT_END_OF_DATA (the limit data_end was reached before
361 // the end of the integers to be parsed.)
362 // from_size: The requested size of the source segment.
363 // from_boundary_name: A NULL-terminated string naming the end of the
364 // source or target file, used in error messages.
365 // from_name: A NULL-terminated string naming the source or target file,
366 // also used in error messages.
367 // source_segment_length (output): The parsed length of the source segment.
368 // source_segment_position (output): The parsed zero-based index in the
369 // source/target file from which the source segment is to be taken.
370 //
371 bool ParseSourceSegmentLengthAndPosition(size_t from_size,
372 const char* from_boundary_name,
373 const char* from_name,
374 size_t* source_segment_length,
375 size_t* source_segment_position);
376
377 ParseableChunk parseable_chunk_;
378
379 // Contains the result code of the last Parse...() operation that failed
380 // (RESULT_ERROR or RESULT_END_OF_DATA). If no Parse...() method has been
381 // called, or if all calls to Parse...() were successful, then this contains
382 // RESULT_SUCCESS.
383 VCDiffResult return_code_;
384
385 // Will be zero until ParseWindowLengths() has been called. After
386 // ParseWindowLengths() has been called successfully, this contains the
387 // parsed length of the delta encoding.
388 size_t delta_encoding_length_;
389
390 // Will be NULL until ParseWindowLengths() has been called. After
391 // ParseWindowLengths() has been called successfully, this points to the
392 // beginning of the section of the current window titled "The delta encoding"
393 // in the RFC, i.e., to the position just after the length of the delta
394 // encoding.
395 const char* delta_encoding_start_;
396
397 // Making these private avoids implicit copy constructor & assignment operator
398 VCDiffHeaderParser(const VCDiffHeaderParser&);
399 void operator=(const VCDiffHeaderParser&);
400 };
401
402 } // namespace open_vcdiff
403
404 #endif // OPEN_VCDIFF_HEADERPARSER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698