Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(298)

Side by Side Diff: courgette/third_party/bsdiff_create.cc

Issue 115435: Store 'diff' bytes by run-length encoding zeros.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « courgette/third_party/bsdiff_apply.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 bsdiff.c -- Binary patch generator. 2 bsdiff.c -- Binary patch generator.
3 3
4 Copyright 2003 Colin Percival 4 Copyright 2003 Colin Percival
5 5
6 For the terms under which this work may be distributed, please see 6 For the terms under which this work may be distributed, please see
7 the adjoining file "LICENSE". 7 the adjoining file "LICENSE".
8 8
9 ChangeLog: 9 ChangeLog:
10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
(...skipping 24 matching lines...) Expand all
35 namespace courgette { 35 namespace courgette {
36 36
37 // ------------------------------------------------------------------------ 37 // ------------------------------------------------------------------------
38 // 38 //
39 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the 39 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the
40 // code formatting and variable names. The only changes from the original are 40 // code formatting and variable names. The only changes from the original are
41 // replacing tabs with spaces, indentation, and using 'const'. 41 // replacing tabs with spaces, indentation, and using 'const'.
42 // 42 //
43 // The code appears to be a rewritten version of the suffix array algorithm 43 // The code appears to be a rewritten version of the suffix array algorithm
44 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko 44 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko
45 // Sadakane, special-cased for bytes. 45 // Sadakane, special cased for bytes.
46 46
47 static void 47 static void
48 split(int *I,int *V,int start,int len,int h) 48 split(int *I,int *V,int start,int len,int h)
49 { 49 {
50 int i,j,k,x,tmp,jj,kk; 50 int i,j,k,x,tmp,jj,kk;
51 51
52 if(len<16) { 52 if(len<16) {
53 for(k=start;k<start+len;k+=j) { 53 for(k=start;k<start+len;k+=j) {
54 j=1;x=V[I[k]+h]; 54 j=1;x=V[I[k]+h];
55 for(i=1;k+i<start+len;i++) { 55 for(i=1;k+i<start+len;i++) {
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after
184 } 184 }
185 185
186 // End of 'verbatim' code. 186 // End of 'verbatim' code.
187 // ------------------------------------------------------------------------ 187 // ------------------------------------------------------------------------
188 188
189 static void WriteHeader(SinkStream* stream, MBSPatchHeader* header) { 189 static void WriteHeader(SinkStream* stream, MBSPatchHeader* header) {
190 stream->Write(header->tag, sizeof(header->tag)); 190 stream->Write(header->tag, sizeof(header->tag));
191 stream->WriteVarint32(header->slen); 191 stream->WriteVarint32(header->slen);
192 stream->WriteVarint32(header->scrc32); 192 stream->WriteVarint32(header->scrc32);
193 stream->WriteVarint32(header->dlen); 193 stream->WriteVarint32(header->dlen);
194 stream->WriteVarint32(header->cblen);
195 stream->WriteVarint32(header->difflen);
196 stream->WriteVarint32(header->extralen);
197 } 194 }
198 195
199 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, 196 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
200 SourceStream* new_stream, 197 SourceStream* new_stream,
201 SinkStream* patch_stream) 198 SinkStream* patch_stream)
202 { 199 {
203 base::Time start_bsdiff_time = base::Time::Now(); 200 base::Time start_bsdiff_time = base::Time::Now();
204 LOG(INFO) << "Start bsdiff"; 201 LOG(INFO) << "Start bsdiff";
205 size_t initial_patch_stream_length = patch_stream->Length(); 202 size_t initial_patch_stream_length = patch_stream->Length();
206 203
204 SinkStreamSet patch_streams;
205 SinkStream* control_stream_copy_counts = patch_streams.stream(0);
206 SinkStream* control_stream_extra_counts = patch_streams.stream(1);
207 SinkStream* control_stream_seeks = patch_streams.stream(2);
208 SinkStream* diff_skips = patch_streams.stream(3);
209 SinkStream* diff_bytes = patch_streams.stream(4);
210 SinkStream* extra_bytes = patch_streams.stream(5);
211
207 const uint8* old = old_stream->Buffer(); 212 const uint8* old = old_stream->Buffer();
208 const int oldsize = old_stream->Remaining(); 213 const int oldsize = old_stream->Remaining();
209 214
215 uint32 pending_diff_zeros = 0;
216
210 scoped_array<int> I(new(std::nothrow) int[oldsize + 1]); 217 scoped_array<int> I(new(std::nothrow) int[oldsize + 1]);
211 scoped_array<int> V(new(std::nothrow) int[oldsize + 1]); 218 scoped_array<int> V(new(std::nothrow) int[oldsize + 1]);
212 if (I == NULL || V == NULL) 219 if (I == NULL || V == NULL)
213 return MEM_ERROR; 220 return MEM_ERROR;
214 221
215 base::Time q_start_time = base::Time::Now(); 222 base::Time q_start_time = base::Time::Now();
216 qsufsort(I.get(), V.get(), old, oldsize); 223 qsufsort(I.get(), V.get(), old, oldsize);
217 LOG(INFO) << " done qsufsort " 224 LOG(INFO) << " done qsufsort "
218 << (base::Time::Now() - q_start_time).InSecondsF(); 225 << (base::Time::Now() - q_start_time).InSecondsF();
219 V.reset(); 226 V.reset();
220 227
221 const uint8* newbuf = new_stream->Buffer(); 228 const uint8* newbuf = new_stream->Buffer();
222 const int newsize = new_stream->Remaining(); 229 const int newsize = new_stream->Remaining();
223 230
224 // Allocate newsize+1 bytes instead of newsize bytes to ensure that we never
225 // try to malloc(0) and get a NULL pointer.
226
227 scoped_array<uint8> diff_bytes_array(new(std::nothrow) uint8[newsize + 1]);
228 scoped_array<uint8> extra_bytes_array(new(std::nothrow) uint8[newsize + 1]);
229 if (diff_bytes_array == NULL || extra_bytes_array == NULL)
230 return MEM_ERROR;
231
232 uint8* diff_bytes = diff_bytes_array.get();
233 uint8* extra_bytes = extra_bytes_array.get();
234 int control_length = 0; 231 int control_length = 0;
235 int diff_bytes_length = 0; 232 int diff_bytes_length = 0;
236 int diff_bytes_nonzero = 0; 233 int diff_bytes_nonzero = 0;
237 int extra_bytes_length = 0; 234 int extra_bytes_length = 0;
238 int eblen = 0; 235 int eblen = 0;
239 236
240 SinkStream control_stream;
241
242 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is 237 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is
243 // the number of bytes to copy from the old file (possibly with mistakes), 238 // the number of bytes to copy from the old file (possibly with mistakes),
244 // 'extra' is the number of bytes to copy from a stream of fresh bytes, and 239 // 'extra' is the number of bytes to copy from a stream of fresh bytes, and
245 // 'seek' is an offset to move to the position to copy for the next triple. 240 // 'seek' is an offset to move to the position to copy for the next triple.
246 // 241 //
247 // The invariant at the top of this loop is that we are committed to emitting 242 // The invariant at the top of this loop is that we are committed to emitting
248 // a triple for the part of |newbuf| surrounding a 'seed' match near 243 // a triple for the part of |newbuf| surrounding a 'seed' match near
249 // |lastscan|. We are searching for a second match that will be the 'seed' of 244 // |lastscan|. We are searching for a second match that will be the 'seed' of
250 // the next triple. As we scan through |newbuf|, one of four things can 245 // the next triple. As we scan through |newbuf|, one of four things can
251 // happen at the current position |scan|: 246 // happen at the current position |scan|:
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
357 if (newbuf[scan - lenb + i] == old[pos - lenb + i]) score--; 352 if (newbuf[scan - lenb + i] == old[pos - lenb + i]) score--;
358 if (score > Ss) { Ss = score; lens = i + 1; } 353 if (score > Ss) { Ss = score; lens = i + 1; }
359 } 354 }
360 355
361 lenf += lens - overlap; 356 lenf += lens - overlap;
362 lenb -= lens; 357 lenb -= lens;
363 }; 358 };
364 359
365 for (int i = 0; i < lenf; i++) { 360 for (int i = 0; i < lenf; i++) {
366 uint8 diff_byte = newbuf[lastscan + i] - old[lastpos + i]; 361 uint8 diff_byte = newbuf[lastscan + i] - old[lastpos + i];
367 diff_bytes[diff_bytes_length + i] = diff_byte; 362 if (diff_byte) {
368 if (diff_byte)
369 ++diff_bytes_nonzero; 363 ++diff_bytes_nonzero;
364 diff_skips->WriteVarint32(pending_diff_zeros);
365 pending_diff_zeros = 0;
366 diff_bytes->Write(&diff_byte, 1);
367 } else {
368 ++pending_diff_zeros;
369 }
370 } 370 }
371 int gap = (scan - lenb) - (lastscan + lenf); 371 int gap = (scan - lenb) - (lastscan + lenf);
372 for (int i = 0; i < gap; i++) 372 for (int i = 0; i < gap; i++)
373 extra_bytes[extra_bytes_length + i] = newbuf[lastscan + lenf + i]; 373 extra_bytes->Write(&newbuf[lastscan + lenf + i], 1);
374 374
375 diff_bytes_length += lenf; 375 diff_bytes_length += lenf;
376 extra_bytes_length += gap; 376 extra_bytes_length += gap;
377 377
378 uint32 copy_count = lenf; 378 uint32 copy_count = lenf;
379 uint32 extra_count = gap; 379 uint32 extra_count = gap;
380 int32 seek_adjustment = ((pos - lenb) - (lastpos + lenf)); 380 int32 seek_adjustment = ((pos - lenb) - (lastpos + lenf));
381 381
382 control_stream.WriteVarint32(copy_count); 382 control_stream_copy_counts->WriteVarint32(copy_count);
383 control_stream.WriteVarint32(extra_count); 383 control_stream_extra_counts->WriteVarint32(extra_count);
384 control_stream.WriteVarint32Signed(seek_adjustment); 384 control_stream_seeks->WriteVarint32Signed(seek_adjustment);
385 ++control_length; 385 ++control_length;
386 #ifdef DEBUG_bsmedberg 386 #ifdef DEBUG_bsmedberg
387 LOG(INFO) << StringPrintf( 387 LOG(INFO) << StringPrintf(
388 "Writing a block: copy: %-8u extra: %-8u seek: %+-9d", 388 "Writing a block: copy: %-8u extra: %-8u seek: %+-9d",
389 copy_count, extra_count, seek_adjustment); 389 copy_count, extra_count, seek_adjustment);
390 #endif 390 #endif
391 391
392 lastscan = scan - lenb; // Include the backward extension in seed. 392 lastscan = scan - lenb; // Include the backward extension in seed.
393 lastpos = pos - lenb; // ditto. 393 lastpos = pos - lenb; // ditto.
394 lastoffset = lastpos - lastscan; 394 lastoffset = lastpos - lastscan;
395 } 395 }
396 } 396 }
397 397
398 diff_skips->WriteVarint32(pending_diff_zeros);
399
398 I.reset(); 400 I.reset();
399 401
400 MBSPatchHeader header; 402 MBSPatchHeader header;
401 // The string will have a null terminator that we don't use, hence '-1'. 403 // The string will have a null terminator that we don't use, hence '-1'.
402 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag), 404 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag),
403 MBS_PATCH_HEADER_TAG_must_match_header_field_size); 405 MBS_PATCH_HEADER_TAG_must_match_header_field_size);
404 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag)); 406 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag));
405 header.slen = oldsize; 407 header.slen = oldsize;
406 header.scrc32 = CalculateCrc(old, oldsize); 408 header.scrc32 = CalculateCrc(old, oldsize);
407 header.dlen = newsize; 409 header.dlen = newsize;
408 header.cblen = control_stream.Length();
409 header.difflen = diff_bytes_length;
410 header.extralen = extra_bytes_length;
411 410
412 WriteHeader(patch_stream, &header); 411 WriteHeader(patch_stream, &header);
413 412
414 patch_stream->Append(&control_stream); 413 size_t diff_skips_length = diff_skips->Length();
415 patch_stream->Write(diff_bytes, diff_bytes_length); 414 patch_streams.CopyTo(patch_stream);
416 patch_stream->Write(extra_bytes, extra_bytes_length);
417 415
418 LOG(INFO) << "Control tuples: " << control_length 416 LOG(INFO) << "Control tuples: " << control_length
419 << " copy bytes: " << diff_bytes_length 417 << " copy bytes: " << diff_bytes_length
420 << " mistakes: " << diff_bytes_nonzero 418 << " mistakes: " << diff_bytes_nonzero
419 << " (skips: " << diff_skips_length << ")"
421 << " extra bytes: " << extra_bytes_length; 420 << " extra bytes: " << extra_bytes_length;
422 421
423 LOG(INFO) << "Uncompressed bsdiff patch size " 422 LOG(INFO) << "Uncompressed bsdiff patch size "
424 << patch_stream->Length() - initial_patch_stream_length; 423 << patch_stream->Length() - initial_patch_stream_length;
425 424
426 LOG(INFO) << "End bsdiff " 425 LOG(INFO) << "End bsdiff "
427 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); 426 << (base::Time::Now() - start_bsdiff_time).InSecondsF();
428 427
429 return OK; 428 return OK;
430 } 429 }
431 430
432 } // namespace 431 } // namespace
OLDNEW
« no previous file with comments | « courgette/third_party/bsdiff_apply.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698