OLD | NEW |
1 /* | 1 /* |
2 bsdiff.c -- Binary patch generator. | 2 bsdiff.c -- Binary patch generator. |
3 | 3 |
4 Copyright 2003 Colin Percival | 4 Copyright 2003 Colin Percival |
5 | 5 |
6 For the terms under which this work may be distributed, please see | 6 For the terms under which this work may be distributed, please see |
7 the adjoining file "LICENSE". | 7 the adjoining file "LICENSE". |
8 | 8 |
9 ChangeLog: | 9 ChangeLog: |
10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit | 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit |
(...skipping 24 matching lines...) Expand all Loading... |
35 namespace courgette { | 35 namespace courgette { |
36 | 36 |
37 // ------------------------------------------------------------------------ | 37 // ------------------------------------------------------------------------ |
38 // | 38 // |
39 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the | 39 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the |
40 // code formatting and variable names. The only changes from the original are | 40 // code formatting and variable names. The only changes from the original are |
41 // replacing tabs with spaces, indentation, and using 'const'. | 41 // replacing tabs with spaces, indentation, and using 'const'. |
42 // | 42 // |
43 // The code appears to be a rewritten version of the suffix array algorithm | 43 // The code appears to be a rewritten version of the suffix array algorithm |
44 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko | 44 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko |
45 // Sadakane, special-cased for bytes. | 45 // Sadakane, special cased for bytes. |
46 | 46 |
47 static void | 47 static void |
48 split(int *I,int *V,int start,int len,int h) | 48 split(int *I,int *V,int start,int len,int h) |
49 { | 49 { |
50 int i,j,k,x,tmp,jj,kk; | 50 int i,j,k,x,tmp,jj,kk; |
51 | 51 |
52 if(len<16) { | 52 if(len<16) { |
53 for(k=start;k<start+len;k+=j) { | 53 for(k=start;k<start+len;k+=j) { |
54 j=1;x=V[I[k]+h]; | 54 j=1;x=V[I[k]+h]; |
55 for(i=1;k+i<start+len;i++) { | 55 for(i=1;k+i<start+len;i++) { |
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
184 } | 184 } |
185 | 185 |
186 // End of 'verbatim' code. | 186 // End of 'verbatim' code. |
187 // ------------------------------------------------------------------------ | 187 // ------------------------------------------------------------------------ |
188 | 188 |
189 static void WriteHeader(SinkStream* stream, MBSPatchHeader* header) { | 189 static void WriteHeader(SinkStream* stream, MBSPatchHeader* header) { |
190 stream->Write(header->tag, sizeof(header->tag)); | 190 stream->Write(header->tag, sizeof(header->tag)); |
191 stream->WriteVarint32(header->slen); | 191 stream->WriteVarint32(header->slen); |
192 stream->WriteVarint32(header->scrc32); | 192 stream->WriteVarint32(header->scrc32); |
193 stream->WriteVarint32(header->dlen); | 193 stream->WriteVarint32(header->dlen); |
194 stream->WriteVarint32(header->cblen); | |
195 stream->WriteVarint32(header->difflen); | |
196 stream->WriteVarint32(header->extralen); | |
197 } | 194 } |
198 | 195 |
199 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, | 196 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, |
200 SourceStream* new_stream, | 197 SourceStream* new_stream, |
201 SinkStream* patch_stream) | 198 SinkStream* patch_stream) |
202 { | 199 { |
203 base::Time start_bsdiff_time = base::Time::Now(); | 200 base::Time start_bsdiff_time = base::Time::Now(); |
204 LOG(INFO) << "Start bsdiff"; | 201 LOG(INFO) << "Start bsdiff"; |
205 size_t initial_patch_stream_length = patch_stream->Length(); | 202 size_t initial_patch_stream_length = patch_stream->Length(); |
206 | 203 |
| 204 SinkStreamSet patch_streams; |
| 205 SinkStream* control_stream_copy_counts = patch_streams.stream(0); |
| 206 SinkStream* control_stream_extra_counts = patch_streams.stream(1); |
| 207 SinkStream* control_stream_seeks = patch_streams.stream(2); |
| 208 SinkStream* diff_skips = patch_streams.stream(3); |
| 209 SinkStream* diff_bytes = patch_streams.stream(4); |
| 210 SinkStream* extra_bytes = patch_streams.stream(5); |
| 211 |
207 const uint8* old = old_stream->Buffer(); | 212 const uint8* old = old_stream->Buffer(); |
208 const int oldsize = old_stream->Remaining(); | 213 const int oldsize = old_stream->Remaining(); |
209 | 214 |
| 215 uint32 pending_diff_zeros = 0; |
| 216 |
210 scoped_array<int> I(new(std::nothrow) int[oldsize + 1]); | 217 scoped_array<int> I(new(std::nothrow) int[oldsize + 1]); |
211 scoped_array<int> V(new(std::nothrow) int[oldsize + 1]); | 218 scoped_array<int> V(new(std::nothrow) int[oldsize + 1]); |
212 if (I == NULL || V == NULL) | 219 if (I == NULL || V == NULL) |
213 return MEM_ERROR; | 220 return MEM_ERROR; |
214 | 221 |
215 base::Time q_start_time = base::Time::Now(); | 222 base::Time q_start_time = base::Time::Now(); |
216 qsufsort(I.get(), V.get(), old, oldsize); | 223 qsufsort(I.get(), V.get(), old, oldsize); |
217 LOG(INFO) << " done qsufsort " | 224 LOG(INFO) << " done qsufsort " |
218 << (base::Time::Now() - q_start_time).InSecondsF(); | 225 << (base::Time::Now() - q_start_time).InSecondsF(); |
219 V.reset(); | 226 V.reset(); |
220 | 227 |
221 const uint8* newbuf = new_stream->Buffer(); | 228 const uint8* newbuf = new_stream->Buffer(); |
222 const int newsize = new_stream->Remaining(); | 229 const int newsize = new_stream->Remaining(); |
223 | 230 |
224 // Allocate newsize+1 bytes instead of newsize bytes to ensure that we never | |
225 // try to malloc(0) and get a NULL pointer. | |
226 | |
227 scoped_array<uint8> diff_bytes_array(new(std::nothrow) uint8[newsize + 1]); | |
228 scoped_array<uint8> extra_bytes_array(new(std::nothrow) uint8[newsize + 1]); | |
229 if (diff_bytes_array == NULL || extra_bytes_array == NULL) | |
230 return MEM_ERROR; | |
231 | |
232 uint8* diff_bytes = diff_bytes_array.get(); | |
233 uint8* extra_bytes = extra_bytes_array.get(); | |
234 int control_length = 0; | 231 int control_length = 0; |
235 int diff_bytes_length = 0; | 232 int diff_bytes_length = 0; |
236 int diff_bytes_nonzero = 0; | 233 int diff_bytes_nonzero = 0; |
237 int extra_bytes_length = 0; | 234 int extra_bytes_length = 0; |
238 int eblen = 0; | 235 int eblen = 0; |
239 | 236 |
240 SinkStream control_stream; | |
241 | |
242 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is | 237 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is |
243 // the number of bytes to copy from the old file (possibly with mistakes), | 238 // the number of bytes to copy from the old file (possibly with mistakes), |
244 // 'extra' is the number of bytes to copy from a stream of fresh bytes, and | 239 // 'extra' is the number of bytes to copy from a stream of fresh bytes, and |
245 // 'seek' is an offset to move to the position to copy for the next triple. | 240 // 'seek' is an offset to move to the position to copy for the next triple. |
246 // | 241 // |
247 // The invariant at the top of this loop is that we are committed to emitting | 242 // The invariant at the top of this loop is that we are committed to emitting |
248 // a triple for the part of |newbuf| surrounding a 'seed' match near | 243 // a triple for the part of |newbuf| surrounding a 'seed' match near |
249 // |lastscan|. We are searching for a second match that will be the 'seed' of | 244 // |lastscan|. We are searching for a second match that will be the 'seed' of |
250 // the next triple. As we scan through |newbuf|, one of four things can | 245 // the next triple. As we scan through |newbuf|, one of four things can |
251 // happen at the current position |scan|: | 246 // happen at the current position |scan|: |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
357 if (newbuf[scan - lenb + i] == old[pos - lenb + i]) score--; | 352 if (newbuf[scan - lenb + i] == old[pos - lenb + i]) score--; |
358 if (score > Ss) { Ss = score; lens = i + 1; } | 353 if (score > Ss) { Ss = score; lens = i + 1; } |
359 } | 354 } |
360 | 355 |
361 lenf += lens - overlap; | 356 lenf += lens - overlap; |
362 lenb -= lens; | 357 lenb -= lens; |
363 }; | 358 }; |
364 | 359 |
365 for (int i = 0; i < lenf; i++) { | 360 for (int i = 0; i < lenf; i++) { |
366 uint8 diff_byte = newbuf[lastscan + i] - old[lastpos + i]; | 361 uint8 diff_byte = newbuf[lastscan + i] - old[lastpos + i]; |
367 diff_bytes[diff_bytes_length + i] = diff_byte; | 362 if (diff_byte) { |
368 if (diff_byte) | |
369 ++diff_bytes_nonzero; | 363 ++diff_bytes_nonzero; |
| 364 diff_skips->WriteVarint32(pending_diff_zeros); |
| 365 pending_diff_zeros = 0; |
| 366 diff_bytes->Write(&diff_byte, 1); |
| 367 } else { |
| 368 ++pending_diff_zeros; |
| 369 } |
370 } | 370 } |
371 int gap = (scan - lenb) - (lastscan + lenf); | 371 int gap = (scan - lenb) - (lastscan + lenf); |
372 for (int i = 0; i < gap; i++) | 372 for (int i = 0; i < gap; i++) |
373 extra_bytes[extra_bytes_length + i] = newbuf[lastscan + lenf + i]; | 373 extra_bytes->Write(&newbuf[lastscan + lenf + i], 1); |
374 | 374 |
375 diff_bytes_length += lenf; | 375 diff_bytes_length += lenf; |
376 extra_bytes_length += gap; | 376 extra_bytes_length += gap; |
377 | 377 |
378 uint32 copy_count = lenf; | 378 uint32 copy_count = lenf; |
379 uint32 extra_count = gap; | 379 uint32 extra_count = gap; |
380 int32 seek_adjustment = ((pos - lenb) - (lastpos + lenf)); | 380 int32 seek_adjustment = ((pos - lenb) - (lastpos + lenf)); |
381 | 381 |
382 control_stream.WriteVarint32(copy_count); | 382 control_stream_copy_counts->WriteVarint32(copy_count); |
383 control_stream.WriteVarint32(extra_count); | 383 control_stream_extra_counts->WriteVarint32(extra_count); |
384 control_stream.WriteVarint32Signed(seek_adjustment); | 384 control_stream_seeks->WriteVarint32Signed(seek_adjustment); |
385 ++control_length; | 385 ++control_length; |
386 #ifdef DEBUG_bsmedberg | 386 #ifdef DEBUG_bsmedberg |
387 LOG(INFO) << StringPrintf( | 387 LOG(INFO) << StringPrintf( |
388 "Writing a block: copy: %-8u extra: %-8u seek: %+-9d", | 388 "Writing a block: copy: %-8u extra: %-8u seek: %+-9d", |
389 copy_count, extra_count, seek_adjustment); | 389 copy_count, extra_count, seek_adjustment); |
390 #endif | 390 #endif |
391 | 391 |
392 lastscan = scan - lenb; // Include the backward extension in seed. | 392 lastscan = scan - lenb; // Include the backward extension in seed. |
393 lastpos = pos - lenb; // ditto. | 393 lastpos = pos - lenb; // ditto. |
394 lastoffset = lastpos - lastscan; | 394 lastoffset = lastpos - lastscan; |
395 } | 395 } |
396 } | 396 } |
397 | 397 |
| 398 diff_skips->WriteVarint32(pending_diff_zeros); |
| 399 |
398 I.reset(); | 400 I.reset(); |
399 | 401 |
400 MBSPatchHeader header; | 402 MBSPatchHeader header; |
401 // The string will have a null terminator that we don't use, hence '-1'. | 403 // The string will have a null terminator that we don't use, hence '-1'. |
402 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag), | 404 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag), |
403 MBS_PATCH_HEADER_TAG_must_match_header_field_size); | 405 MBS_PATCH_HEADER_TAG_must_match_header_field_size); |
404 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag)); | 406 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag)); |
405 header.slen = oldsize; | 407 header.slen = oldsize; |
406 header.scrc32 = CalculateCrc(old, oldsize); | 408 header.scrc32 = CalculateCrc(old, oldsize); |
407 header.dlen = newsize; | 409 header.dlen = newsize; |
408 header.cblen = control_stream.Length(); | |
409 header.difflen = diff_bytes_length; | |
410 header.extralen = extra_bytes_length; | |
411 | 410 |
412 WriteHeader(patch_stream, &header); | 411 WriteHeader(patch_stream, &header); |
413 | 412 |
414 patch_stream->Append(&control_stream); | 413 size_t diff_skips_length = diff_skips->Length(); |
415 patch_stream->Write(diff_bytes, diff_bytes_length); | 414 patch_streams.CopyTo(patch_stream); |
416 patch_stream->Write(extra_bytes, extra_bytes_length); | |
417 | 415 |
418 LOG(INFO) << "Control tuples: " << control_length | 416 LOG(INFO) << "Control tuples: " << control_length |
419 << " copy bytes: " << diff_bytes_length | 417 << " copy bytes: " << diff_bytes_length |
420 << " mistakes: " << diff_bytes_nonzero | 418 << " mistakes: " << diff_bytes_nonzero |
| 419 << " (skips: " << diff_skips_length << ")" |
421 << " extra bytes: " << extra_bytes_length; | 420 << " extra bytes: " << extra_bytes_length; |
422 | 421 |
423 LOG(INFO) << "Uncompressed bsdiff patch size " | 422 LOG(INFO) << "Uncompressed bsdiff patch size " |
424 << patch_stream->Length() - initial_patch_stream_length; | 423 << patch_stream->Length() - initial_patch_stream_length; |
425 | 424 |
426 LOG(INFO) << "End bsdiff " | 425 LOG(INFO) << "End bsdiff " |
427 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); | 426 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); |
428 | 427 |
429 return OK; | 428 return OK; |
430 } | 429 } |
431 | 430 |
432 } // namespace | 431 } // namespace |
OLD | NEW |