Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(423)

Side by Side Diff: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdiff_main.cc

Issue 5203: Transition to pulling open-vcdiff from repository, instead of using snapshot... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // A command-line interface to the open-vcdiff library.
17
18 #include <config.h>
19 #include <cassert>
20 #include <cerrno>
21 #include <cstdio>
22 #include <cstring> // strerror
23 #include <memory>
24 #include <string>
25 #include <vector>
26 #include "google/gflags.h"
27 #include "logging.h"
28 #include "google/vcdecoder.h"
29 #include "google/vcencoder.h"
30
31 using std::string;
32 using google::GetCommandLineFlagInfoOrDie;
33 using google::ShowUsageWithFlagsRestrict;
34
35 // The buffer size, which determines the maximum allowable size
36 // of a target window, based on how much memory can be allocated.
37 // Both of these can be increased (and the default can be decreased)
38 // using the --buffersize flag.
39 static const size_t kDefaultBufferSize = 1 << 20; // 1 MB
40 static const size_t kMaxBufferSize = 1 << 26; // 64 MB
41
42 // Definitions of command-line flags
43 DEFINE_string(dictionary, "",
44 "File containing dictionary data (required)");
45 DEFINE_string(target, "",
46 "Target file (default is stdin for encode, stdout for decode");
47 DEFINE_string(delta, "",
48 "Encoded delta file (default is stdout for encode, "
49 "stdin for decode");
50 DEFINE_uint64(buffersize, kDefaultBufferSize,
51 "Buffer size for reading input file");
52 DEFINE_bool(checksum, false,
53 "Include an Adler32 checksum of the target data when encoding");
54 DEFINE_bool(interleaved, false, "Use interleaved format");
55 DEFINE_bool(stats, false, "Report compression percentage");
56 DEFINE_bool(target_matches, false, "Find duplicate strings in target data"
57 " as well as dictionary data");
58
59 static const char* const kUsageString =
60 " {encode | delta | decode | patch }[ <options> ]\n"
61 "encode or delta: create delta file from dictionary and target file\n"
62 "decode or patch: reconstruct target file from dictionary and delta file";
63
64 namespace open_vcdiff {
65
66 class VCDiffFileBasedCoder {
67 public:
68 VCDiffFileBasedCoder();
69 ~VCDiffFileBasedCoder();
70
71 // Once the command-line arguments have been parsed, these functions
72 // will use the supplied options to carry out a file-based encode
73 // or decode operation.
74 bool Encode();
75 bool Decode();
76 bool DecodeAndCompare(); // for "vcdiff test"; compare target with original
77
78 private:
79 // Determines the size of the file. The given file must be an input file
80 // opened for reading only, not an input stream such as stdin. The function
81 // returns true and populates file_size if successful; otherwise, it returns
82 // false.
83 static bool FileSize(FILE* file, size_t* file_size);
84
85 // Opens a file for incremental reading. file_name is the name of the file
86 // to be opened. file_type should be a descriptive name (like "target") for
87 // use in log messages. If successful, returns true and sets *file to a
88 // valid input file, *buffer to a region of memory allocated using malloc()
89 // (so the caller must release it using free()), and buffer_size to the size
90 // of the buffer, which will not be larger than the size of the file, and
91 // will not be smaller than the --buffersize option. If the function fails,
92 // it outputs a log message and returns false.
93 bool OpenFileForReading(const string& file_name,
94 const char* file_type,
95 FILE** file,
96 std::vector<char>* buffer);
97
98 // Opens the dictionary file and reads it into a newly allocated buffer.
99 // If successful, returns true and populates dictionary_ with the dictionary
100 // contents; otherwise, returns false.
101 bool OpenDictionary();
102
103 // Opens the input file (the delta or target file) for reading.
104 // Allocates space for the input buffer. If successful,
105 // input_file_ will be valid and input_buffer_ will be allocated.
106 bool OpenInputFile() {
107 return OpenFileForReading(input_file_name_,
108 input_file_type_,
109 &input_file_,
110 &input_buffer_);
111 }
112
113 // Opens the output file (the target or delta file) for writing.
114 // If successful, output_file_ will be valid.
115 bool OpenOutputFile();
116
117 // Opens the output file (the target file) for comparison against the decoded
118 // output when using "vcdiff test".
119 bool OpenOutputFileForCompare() {
120 return OpenFileForReading(output_file_name_,
121 output_file_type_,
122 &output_file_,
123 &compare_buffer_);
124 }
125
126 // Reads as much input data as possible from the input file
127 // into input_buffer_. If successful, returns true and sets *bytes_read
128 // to the number of bytes read into input_buffer_. If an error occurs,
129 // writes an error log message and returns false.
130 bool ReadInput(size_t* bytes_read);
131
132 // Writes the contents of output to output_file_. If successful, returns
133 // true. If an error occurs, writes an error log message and returns false.
134 bool WriteOutput(const string& output);
135
136 // Reads a number of bytes from output_file_ equal to the size of output,
137 // and compares to make sure they match the contents of output. If the bytes
138 // do not match, or if end of file is reached before the expected number of
139 // bytes have been read, or a read error occurs, the function returns false;
140 // otherwise, returns true.
141 bool CompareOutput(const string& output);
142
143 // Dictionary contents. The entire dictionary file will be read into memory.
144 std::vector<char> dictionary_;
145
146 std::auto_ptr<open_vcdiff::HashedDictionary> hashed_dictionary_;
147
148 // These should be set to either "delta" or "target". They are only
149 // used in log messages such as "Error opening delta file..."
150 const char* input_file_type_;
151 const char* output_file_type_;
152
153 // The filenames used for input and output. Will be empty if stdin
154 // or stdout is being used.
155 string input_file_name_;
156 string output_file_name_;
157
158 // stdio-style file handles for the input and output files and the dictionary.
159 // When encoding, input_file_ is the target file and output_file_ is the delta
160 // file; when decoding, the reverse is true. The dictionary is always read
161 // from a file rather than from standard input.
162 FILE* input_file_;
163 FILE* output_file_;
164
165 // A memory buffer used to load the input file into memory. If the input
166 // comes from stdin because no input file was specified, then the size of
167 // input_buffer_ will be the value specified by the --buffersize option.
168 // If the input comes from a file, then the buffer will be allocated to match
169 // the file size, if possible. However, the buffer will not exceed
170 // kMaxBufferSize bytes in length, unless the user specifies the --buffersize
171 // option to override that limit.
172 std::vector<char> input_buffer_;
173
174 // A memory buffer used to load the output file into memory for comparison
175 // if "vcdiff test" is specified.
176 std::vector<char> compare_buffer_;
177
178 // Making these private avoids implicit copy constructor & assignment operator
179 VCDiffFileBasedCoder(const VCDiffFileBasedCoder&); // NOLINT
180 void operator=(const VCDiffFileBasedCoder&);
181 };
182
183 inline VCDiffFileBasedCoder::VCDiffFileBasedCoder()
184 : input_file_type_(""),
185 output_file_type_(""),
186 input_file_(NULL),
187 output_file_(NULL) { }
188
189 VCDiffFileBasedCoder::~VCDiffFileBasedCoder() {
190 if (input_file_ && (input_file_ != stdin)) {
191 fclose(input_file_);
192 input_file_ = NULL;
193 }
194 if (output_file_ && (output_file_ != stdout)) {
195 fclose(output_file_);
196 output_file_ = NULL;
197 }
198 }
199
200 bool VCDiffFileBasedCoder::FileSize(FILE* file, size_t* file_size) {
201 long initial_position = ftell(file);
202 if (fseek(file, 0, SEEK_END) != 0) {
203 return false;
204 }
205 *file_size = static_cast<size_t>(ftell(file));
206 if (fseek(file, initial_position, SEEK_SET) != 0) {
207 return false;
208 }
209 return true;
210 }
211
212 bool VCDiffFileBasedCoder::OpenDictionary() {
213 assert(dictionary_.empty());
214 assert(!FLAGS_dictionary.empty());
215 FILE* dictionary_file = fopen(FLAGS_dictionary.c_str(), "rb");
216 if (!dictionary_file) {
217 LOG(ERROR) << "Error opening dictionary file '" << FLAGS_dictionary
218 << "': " << strerror(errno) << LOG_ENDL;
219 return false;
220 }
221 size_t dictionary_size = 0U;
222 if (!FileSize(dictionary_file, &dictionary_size)) {
223 LOG(ERROR) << "Error finding size of dictionary file '" << FLAGS_dictionary
224 << "': " << strerror(errno) << LOG_ENDL;
225 return false;
226 }
227 dictionary_.resize(dictionary_size);
228 if (fread(&dictionary_[0], 1, dictionary_size, dictionary_file)
229 != dictionary_size) {
230 LOG(ERROR) << "Unable to read dictionary file '" << FLAGS_dictionary
231 << "': " << strerror(errno) << LOG_ENDL;
232 fclose(dictionary_file);
233 dictionary_.clear();
234 return false;
235 }
236 fclose(dictionary_file);
237 return true;
238 }
239
240 bool VCDiffFileBasedCoder::OpenFileForReading(const string& file_name,
241 const char* file_type,
242 FILE** file,
243 std::vector<char>* buffer) {
244 assert(buffer->empty());
245 size_t buffer_size = 0U;
246 if (!*file && file_name.empty()) {
247 *file = stdin;
248 buffer_size = static_cast<size_t>(FLAGS_buffersize);
249 } else {
250 if (!*file) {
251 *file = fopen(file_name.c_str(), "rb");
252 if (!*file) {
253 LOG(ERROR) << "Error opening " << file_type << " file '"
254 << file_name << "': " << strerror(errno) << LOG_ENDL;
255 return false;
256 }
257 }
258 size_t file_size = 0U;
259 if (!FileSize(*file, &file_size)) {
260 LOG(ERROR) << "Error finding size of " << file_type << " file '"
261 << file_name << "': " << strerror(errno) << LOG_ENDL;
262 return false;
263 }
264 buffer_size = kMaxBufferSize;
265 if (FLAGS_buffersize > buffer_size) {
266 buffer_size = static_cast<size_t>(FLAGS_buffersize);
267 }
268 if (file_size < buffer_size) {
269 // Allocate just enough memory to store the entire file
270 buffer_size = file_size;
271 }
272 }
273 buffer->resize(buffer_size);
274 return true;
275 }
276
277 // Opens the output file for streamed read operations using the
278 // standard C I/O library, i.e., fopen(), fwrite(), fclose().
279 // No output buffer is allocated because the encoded/decoded output
280 // is constructed progressively using a std::string object
281 // whose buffer is resized as needed.
282 bool VCDiffFileBasedCoder::OpenOutputFile() {
283 if (output_file_name_.empty()) {
284 output_file_ = stdout;
285 } else {
286 output_file_ = fopen(output_file_name_.c_str(), "wb");
287 if (!output_file_) {
288 LOG(ERROR) << "Error opening " << output_file_type_ << " file '"
289 << output_file_name_
290 << "': " << strerror(errno) << LOG_ENDL;
291 return false;
292 }
293 }
294 return true;
295 }
296
297 bool VCDiffFileBasedCoder::ReadInput(size_t* bytes_read) {
298 // Read from file or stdin
299 *bytes_read = fread(&input_buffer_[0], 1, input_buffer_.size(), input_file_);
300 if (ferror(input_file_)) {
301 LOG(ERROR) << "Error reading from " << input_file_type_ << " file '"
302 << input_file_name_
303 << "': " << strerror(errno) << LOG_ENDL;
304 return false;
305 }
306 return true;
307 }
308
309 bool VCDiffFileBasedCoder::WriteOutput(const string& output) {
310 if (!output.empty()) {
311 // Some new output has been generated and is ready to be written
312 // to the output file or to stdout.
313 fwrite(output.data(), 1, output.size(), output_file_);
314 if (ferror(output_file_)) {
315 LOG(ERROR) << "Error writing " << output.size() << " bytes to "
316 << output_file_type_ << " file '" << output_file_name_
317 << "': " << strerror(errno) << LOG_ENDL;
318 return false;
319 }
320 }
321 return true;
322 }
323
324 bool VCDiffFileBasedCoder::CompareOutput(const string& output) {
325 if (!output.empty()) {
326 size_t output_size = output.size();
327 // Some new output has been generated and is ready to be compared against
328 // the output file.
329 if (output_size > compare_buffer_.size()) {
330 compare_buffer_.resize(output_size);
331 }
332 size_t bytes_read = fread(&compare_buffer_[0],
333 1,
334 output_size,
335 output_file_);
336 if (ferror(output_file_)) {
337 LOG(ERROR) << "Error reading from " << output_file_type_ << " file '"
338 << output_file_name_ << "': " << strerror(errno) << LOG_ENDL;
339 return false;
340 }
341 if (bytes_read < output_size) {
342 LOG(ERROR) << "Decoded target is longer than original target file"
343 << LOG_ENDL;
344 return false;
345 }
346 if (output.compare(0, output_size, &compare_buffer_[0], bytes_read) != 0) {
347 LOG(ERROR) << "Original target file does not match decoded target"
348 << LOG_ENDL;
349 return false;
350 }
351 }
352 return true;
353 }
354
355 bool VCDiffFileBasedCoder::Encode() {
356 input_file_type_ = "target";
357 input_file_name_ = FLAGS_target;
358 output_file_type_ = "delta";
359 output_file_name_ = FLAGS_delta;
360 if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFile()) {
361 return false;
362 }
363 hashed_dictionary_.reset(
364 new open_vcdiff::HashedDictionary(&dictionary_[0], dictionary_.size()));
365 if (!hashed_dictionary_->Init()) {
366 LOG(ERROR) << "Error initializing hashed dictionary" << LOG_ENDL;
367 return false;
368 }
369 VCDiffFormatExtensionFlags format_flags = open_vcdiff::VCD_STANDARD_FORMAT;
370 if (FLAGS_interleaved) {
371 format_flags |= open_vcdiff::VCD_FORMAT_INTERLEAVED;
372 }
373 if (FLAGS_checksum) {
374 format_flags |= open_vcdiff::VCD_FORMAT_CHECKSUM;
375 }
376 open_vcdiff::VCDiffStreamingEncoder encoder(hashed_dictionary_.get(),
377 format_flags,
378 FLAGS_target_matches);
379 string output;
380 size_t input_size = 0;
381 size_t output_size = 0;
382 {
383 if (!encoder.StartEncoding(&output)) {
384 LOG(ERROR) << "Error during encoder initialization" << LOG_ENDL;
385 return false;
386 }
387 }
388 do {
389 size_t bytes_read = 0;
390 if (!WriteOutput(output) || !ReadInput(&bytes_read)) {
391 return false;
392 }
393 output_size += output.size();
394 output.clear();
395 if (bytes_read > 0) {
396 input_size += bytes_read;
397 if (!encoder.EncodeChunk(&input_buffer_[0], bytes_read, &output)) {
398 LOG(ERROR) << "Error trying to encode data chunk of length "
399 << bytes_read << LOG_ENDL;
400 return false;
401 }
402 }
403 } while (!feof(input_file_));
404 encoder.FinishEncoding(&output);
405 if (!WriteOutput(output)) {
406 return false;
407 }
408 output_size += output.size();
409 output.clear();
410 if (FLAGS_stats && (input_size > 0)) {
411 LOG(INFO) << "Original size: " << input_size
412 << "\tCompressed size: " << output_size << " ("
413 << ((static_cast<double>(output_size) / input_size) * 100)
414 << "% of original)" << LOG_ENDL;
415 }
416 return true;
417 }
418
419 bool VCDiffFileBasedCoder::Decode() {
420 input_file_type_ = "delta";
421 input_file_name_ = FLAGS_delta;
422 output_file_type_ = "target";
423 output_file_name_ = FLAGS_target;
424 if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFile()) {
425 return false;
426 }
427
428 open_vcdiff::VCDiffStreamingDecoder decoder;
429 string output;
430 size_t input_size = 0;
431 size_t output_size = 0;
432 decoder.StartDecoding(&dictionary_[0], dictionary_.size());
433
434 do {
435 size_t bytes_read = 0;
436 if (!ReadInput(&bytes_read)) {
437 return false;
438 }
439 if (bytes_read > 0) {
440 input_size += bytes_read;
441 if (!decoder.DecodeChunk(&input_buffer_[0], bytes_read, &output)) {
442 LOG(ERROR) << "Error trying to decode data chunk of length "
443 << bytes_read << LOG_ENDL;
444 return false;
445 }
446 }
447 if (!WriteOutput(output)) {
448 return false;
449 }
450 output_size += output.size();
451 output.clear();
452 } while (!feof(input_file_));
453 if (!decoder.FinishDecoding()) {
454 LOG(ERROR) << "Decode error; '" << FLAGS_delta
455 << " may not be a valid VCDIFF delta file" << LOG_ENDL;
456 return false;
457 }
458 if (!WriteOutput(output)) {
459 return false;
460 }
461 output_size += output.size();
462 output.clear();
463 if (FLAGS_stats && (output_size > 0)) {
464 LOG(INFO) << "Decompressed size: " << output_size
465 << "\tCompressed size: " << input_size << " ("
466 << ((static_cast<double>(input_size) / output_size) * 100)
467 << "% of original)" << LOG_ENDL;
468 }
469 return true;
470 }
471
472 bool VCDiffFileBasedCoder::DecodeAndCompare() {
473 input_file_type_ = "delta";
474 input_file_name_ = FLAGS_delta;
475 output_file_type_ = "target";
476 output_file_name_ = FLAGS_target;
477 if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFileForCompare()) {
478 return false;
479 }
480
481 open_vcdiff::VCDiffStreamingDecoder decoder;
482 string output;
483 size_t input_size = 0;
484 size_t output_size = 0;
485 decoder.StartDecoding(&dictionary_[0], dictionary_.size());
486
487 do {
488 size_t bytes_read = 0;
489 if (!ReadInput(&bytes_read)) {
490 return false;
491 }
492 if (bytes_read > 0) {
493 input_size += bytes_read;
494 if (!decoder.DecodeChunk(&input_buffer_[0], bytes_read, &output)) {
495 LOG(ERROR) << "Error trying to decode data chunk of length "
496 << bytes_read << LOG_ENDL;
497 return false;
498 }
499 }
500 if (!CompareOutput(output)) {
501 return false;
502 }
503 output_size += output.size();
504 output.clear();
505 } while (!feof(input_file_));
506 if (!decoder.FinishDecoding()) {
507 LOG(ERROR) << "Decode error; '" << FLAGS_delta
508 << " may not be a valid VCDIFF delta file" << LOG_ENDL;
509 return false;
510 }
511 if (!CompareOutput(output)) {
512 return false;
513 }
514 output_size += output.size();
515 output.clear();
516 if (fgetc(output_file_) != EOF) {
517 LOG(ERROR) << "Decoded target is shorter than original target file"
518 << LOG_ENDL;
519 return false;
520 }
521 if (ferror(output_file_)) {
522 LOG(ERROR) << "Error reading end-of-file indicator from target file"
523 << LOG_ENDL;
524 return false;
525 }
526 if (FLAGS_stats && (output_size > 0)) {
527 LOG(INFO) << "Decompressed size: " << output_size
528 << "\tCompressed size: " << input_size << " ("
529 << ((static_cast<double>(input_size) / output_size) * 100)
530 << "% of original)" << LOG_ENDL;
531 }
532 return true;
533 }
534
535 } // namespace open_vcdiff
536
537 int main(int argc, char** argv) {
538 const char* const command_name = argv[0];
539 google::SetUsageMessage(kUsageString);
540 google::ParseCommandLineFlags(&argc, &argv, true);
541 if (argc != 2) {
542 LOG(ERROR) << command_name << ": Must specify exactly one command option"
543 << LOG_ENDL;
544 ShowUsageWithFlagsRestrict(command_name, "vcdiff");
545 return 1;
546 }
547 const char* const command_option = argv[1];
548 if (FLAGS_dictionary.empty()) {
549 LOG(ERROR) << command_name << " " << command_option
550 << ": Must specify --dictionary <file-name>" << LOG_ENDL;
551 ShowUsageWithFlagsRestrict(command_name, "vcdiff");
552 return 1;
553 }
554 if (!GetCommandLineFlagInfoOrDie("buffersize").is_default &&
555 (FLAGS_buffersize == 0)) {
556 LOG(ERROR) << command_name << ": Option --buffersize cannot be 0"
557 << LOG_ENDL;
558 ShowUsageWithFlagsRestrict(command_name, "vcdiff");
559 return 1;
560 }
561 if ((strcmp(command_option, "encode") == 0) ||
562 (strcmp(command_option, "delta") == 0)) {
563 open_vcdiff::VCDiffFileBasedCoder coder;
564 if (!coder.Encode()) {
565 return 1;
566 }
567 // The destructor for VCDiffFileBasedCoder will clean up the open files
568 // and allocated memory.
569 } else if ((strcmp(command_option, "decode") == 0) ||
570 (strcmp(command_option, "patch") == 0)) {
571 open_vcdiff::VCDiffFileBasedCoder coder;
572 if (!coder.Decode()) {
573 return 1;
574 }
575 } else if ((strcmp(command_option, "test") == 0)) {
576 // "vcdiff test" does not appear in the usage string, but can be
577 // used for debugging. It encodes, then decodes, then compares the result
578 // with the original target. It expects the same arguments as
579 // "vcdiff encode", with the additional requirement that the --target
580 // and --delta file arguments must be specified, rather than using stdin
581 // or stdout. It produces a delta file just as for "vcdiff encode".
582 if (FLAGS_target.empty() || FLAGS_delta.empty()) {
583 LOG(ERROR) << command_name
584 << " test: Must specify both --target <file-name>"
585 " and --delta <file-name>" << LOG_ENDL;
586 return 1;
587 }
588 const string original_target(FLAGS_target);
589 // Put coder into a separate scope.
590 {
591 open_vcdiff::VCDiffFileBasedCoder coder;
592 if (!coder.Encode()) {
593 return 1;
594 }
595 }
596 {
597 open_vcdiff::VCDiffFileBasedCoder coder;
598 if (!coder.DecodeAndCompare()) {
599 return 1;
600 }
601 }
602 } else {
603 LOG(ERROR) << command_name << ": Unrecognized command option "
604 << command_option << LOG_ENDL;
605 ShowUsageWithFlagsRestrict(command_name, "vcdiff");
606 return 1;
607 }
608 return 0;
609 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698