| OLD | NEW |
| 1 /* | 1 /* |
| 2 bsdiff.c -- Binary patch generator. | 2 bsdiff.c -- Binary patch generator. |
| 3 | 3 |
| 4 Copyright 2003 Colin Percival | 4 Copyright 2003 Colin Percival |
| 5 | 5 |
| 6 For the terms under which this work may be distributed, please see | 6 For the terms under which this work may be distributed, please see |
| 7 the adjoining file "LICENSE". | 7 the adjoining file "LICENSE". |
| 8 | 8 |
| 9 ChangeLog: | 9 ChangeLog: |
| 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit | 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit |
| (...skipping 24 matching lines...) Expand all Loading... |
| 35 namespace courgette { | 35 namespace courgette { |
| 36 | 36 |
| 37 // ------------------------------------------------------------------------ | 37 // ------------------------------------------------------------------------ |
| 38 // | 38 // |
| 39 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the | 39 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the |
| 40 // code formatting and variable names. The only changes from the original are | 40 // code formatting and variable names. The only changes from the original are |
| 41 // replacing tabs with spaces, indentation, and using 'const'. | 41 // replacing tabs with spaces, indentation, and using 'const'. |
| 42 // | 42 // |
| 43 // The code appears to be a rewritten version of the suffix array algorithm | 43 // The code appears to be a rewritten version of the suffix array algorithm |
| 44 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko | 44 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko |
| 45 // Sadakane, special-cased for bytes. | 45 // Sadakane, special cased for bytes. |
| 46 | 46 |
| 47 static void | 47 static void |
| 48 split(int *I,int *V,int start,int len,int h) | 48 split(int *I,int *V,int start,int len,int h) |
| 49 { | 49 { |
| 50 int i,j,k,x,tmp,jj,kk; | 50 int i,j,k,x,tmp,jj,kk; |
| 51 | 51 |
| 52 if(len<16) { | 52 if(len<16) { |
| 53 for(k=start;k<start+len;k+=j) { | 53 for(k=start;k<start+len;k+=j) { |
| 54 j=1;x=V[I[k]+h]; | 54 j=1;x=V[I[k]+h]; |
| 55 for(i=1;k+i<start+len;i++) { | 55 for(i=1;k+i<start+len;i++) { |
| (...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 184 } | 184 } |
| 185 | 185 |
| 186 // End of 'verbatim' code. | 186 // End of 'verbatim' code. |
| 187 // ------------------------------------------------------------------------ | 187 // ------------------------------------------------------------------------ |
| 188 | 188 |
| 189 static void WriteHeader(SinkStream* stream, MBSPatchHeader* header) { | 189 static void WriteHeader(SinkStream* stream, MBSPatchHeader* header) { |
| 190 stream->Write(header->tag, sizeof(header->tag)); | 190 stream->Write(header->tag, sizeof(header->tag)); |
| 191 stream->WriteVarint32(header->slen); | 191 stream->WriteVarint32(header->slen); |
| 192 stream->WriteVarint32(header->scrc32); | 192 stream->WriteVarint32(header->scrc32); |
| 193 stream->WriteVarint32(header->dlen); | 193 stream->WriteVarint32(header->dlen); |
| 194 stream->WriteVarint32(header->cblen); | |
| 195 stream->WriteVarint32(header->difflen); | |
| 196 stream->WriteVarint32(header->extralen); | |
| 197 } | 194 } |
| 198 | 195 |
| 199 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, | 196 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, |
| 200 SourceStream* new_stream, | 197 SourceStream* new_stream, |
| 201 SinkStream* patch_stream) | 198 SinkStream* patch_stream) |
| 202 { | 199 { |
| 203 base::Time start_bsdiff_time = base::Time::Now(); | 200 base::Time start_bsdiff_time = base::Time::Now(); |
| 204 LOG(INFO) << "Start bsdiff"; | 201 LOG(INFO) << "Start bsdiff"; |
| 205 size_t initial_patch_stream_length = patch_stream->Length(); | 202 size_t initial_patch_stream_length = patch_stream->Length(); |
| 206 | 203 |
| 204 SinkStreamSet patch_streams; |
| 205 SinkStream* control_stream_copy_counts = patch_streams.stream(0); |
| 206 SinkStream* control_stream_extra_counts = patch_streams.stream(1); |
| 207 SinkStream* control_stream_seeks = patch_streams.stream(2); |
| 208 SinkStream* diff_skips = patch_streams.stream(3); |
| 209 SinkStream* diff_bytes = patch_streams.stream(4); |
| 210 SinkStream* extra_bytes = patch_streams.stream(5); |
| 211 |
| 207 const uint8* old = old_stream->Buffer(); | 212 const uint8* old = old_stream->Buffer(); |
| 208 const int oldsize = old_stream->Remaining(); | 213 const int oldsize = old_stream->Remaining(); |
| 209 | 214 |
| 215 uint32 pending_diff_zeros = 0; |
| 216 |
| 210 scoped_array<int> I(new(std::nothrow) int[oldsize + 1]); | 217 scoped_array<int> I(new(std::nothrow) int[oldsize + 1]); |
| 211 scoped_array<int> V(new(std::nothrow) int[oldsize + 1]); | 218 scoped_array<int> V(new(std::nothrow) int[oldsize + 1]); |
| 212 if (I == NULL || V == NULL) | 219 if (I == NULL || V == NULL) |
| 213 return MEM_ERROR; | 220 return MEM_ERROR; |
| 214 | 221 |
| 215 base::Time q_start_time = base::Time::Now(); | 222 base::Time q_start_time = base::Time::Now(); |
| 216 qsufsort(I.get(), V.get(), old, oldsize); | 223 qsufsort(I.get(), V.get(), old, oldsize); |
| 217 LOG(INFO) << " done qsufsort " | 224 LOG(INFO) << " done qsufsort " |
| 218 << (base::Time::Now() - q_start_time).InSecondsF(); | 225 << (base::Time::Now() - q_start_time).InSecondsF(); |
| 219 V.reset(); | 226 V.reset(); |
| 220 | 227 |
| 221 const uint8* newbuf = new_stream->Buffer(); | 228 const uint8* newbuf = new_stream->Buffer(); |
| 222 const int newsize = new_stream->Remaining(); | 229 const int newsize = new_stream->Remaining(); |
| 223 | 230 |
| 224 // Allocate newsize+1 bytes instead of newsize bytes to ensure that we never | |
| 225 // try to malloc(0) and get a NULL pointer. | |
| 226 | |
| 227 scoped_array<uint8> diff_bytes_array(new(std::nothrow) uint8[newsize + 1]); | |
| 228 scoped_array<uint8> extra_bytes_array(new(std::nothrow) uint8[newsize + 1]); | |
| 229 if (diff_bytes_array == NULL || extra_bytes_array == NULL) | |
| 230 return MEM_ERROR; | |
| 231 | |
| 232 uint8* diff_bytes = diff_bytes_array.get(); | |
| 233 uint8* extra_bytes = extra_bytes_array.get(); | |
| 234 int control_length = 0; | 231 int control_length = 0; |
| 235 int diff_bytes_length = 0; | 232 int diff_bytes_length = 0; |
| 236 int diff_bytes_nonzero = 0; | 233 int diff_bytes_nonzero = 0; |
| 237 int extra_bytes_length = 0; | 234 int extra_bytes_length = 0; |
| 238 int eblen = 0; | 235 int eblen = 0; |
| 239 | 236 |
| 240 SinkStream control_stream; | |
| 241 | |
| 242 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is | 237 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is |
| 243 // the number of bytes to copy from the old file (possibly with mistakes), | 238 // the number of bytes to copy from the old file (possibly with mistakes), |
| 244 // 'extra' is the number of bytes to copy from a stream of fresh bytes, and | 239 // 'extra' is the number of bytes to copy from a stream of fresh bytes, and |
| 245 // 'seek' is an offset to move to the position to copy for the next triple. | 240 // 'seek' is an offset to move to the position to copy for the next triple. |
| 246 // | 241 // |
| 247 // The invariant at the top of this loop is that we are committed to emitting | 242 // The invariant at the top of this loop is that we are committed to emitting |
| 248 // a triple for the part of |newbuf| surrounding a 'seed' match near | 243 // a triple for the part of |newbuf| surrounding a 'seed' match near |
| 249 // |lastscan|. We are searching for a second match that will be the 'seed' of | 244 // |lastscan|. We are searching for a second match that will be the 'seed' of |
| 250 // the next triple. As we scan through |newbuf|, one of four things can | 245 // the next triple. As we scan through |newbuf|, one of four things can |
| 251 // happen at the current position |scan|: | 246 // happen at the current position |scan|: |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 357 if (newbuf[scan - lenb + i] == old[pos - lenb + i]) score--; | 352 if (newbuf[scan - lenb + i] == old[pos - lenb + i]) score--; |
| 358 if (score > Ss) { Ss = score; lens = i + 1; } | 353 if (score > Ss) { Ss = score; lens = i + 1; } |
| 359 } | 354 } |
| 360 | 355 |
| 361 lenf += lens - overlap; | 356 lenf += lens - overlap; |
| 362 lenb -= lens; | 357 lenb -= lens; |
| 363 }; | 358 }; |
| 364 | 359 |
| 365 for (int i = 0; i < lenf; i++) { | 360 for (int i = 0; i < lenf; i++) { |
| 366 uint8 diff_byte = newbuf[lastscan + i] - old[lastpos + i]; | 361 uint8 diff_byte = newbuf[lastscan + i] - old[lastpos + i]; |
| 367 diff_bytes[diff_bytes_length + i] = diff_byte; | 362 if (diff_byte) { |
| 368 if (diff_byte) | |
| 369 ++diff_bytes_nonzero; | 363 ++diff_bytes_nonzero; |
| 364 diff_skips->WriteVarint32(pending_diff_zeros); |
| 365 pending_diff_zeros = 0; |
| 366 diff_bytes->Write(&diff_byte, 1); |
| 367 } else { |
| 368 ++pending_diff_zeros; |
| 369 } |
| 370 } | 370 } |
| 371 int gap = (scan - lenb) - (lastscan + lenf); | 371 int gap = (scan - lenb) - (lastscan + lenf); |
| 372 for (int i = 0; i < gap; i++) | 372 for (int i = 0; i < gap; i++) |
| 373 extra_bytes[extra_bytes_length + i] = newbuf[lastscan + lenf + i]; | 373 extra_bytes->Write(&newbuf[lastscan + lenf + i], 1); |
| 374 | 374 |
| 375 diff_bytes_length += lenf; | 375 diff_bytes_length += lenf; |
| 376 extra_bytes_length += gap; | 376 extra_bytes_length += gap; |
| 377 | 377 |
| 378 uint32 copy_count = lenf; | 378 uint32 copy_count = lenf; |
| 379 uint32 extra_count = gap; | 379 uint32 extra_count = gap; |
| 380 int32 seek_adjustment = ((pos - lenb) - (lastpos + lenf)); | 380 int32 seek_adjustment = ((pos - lenb) - (lastpos + lenf)); |
| 381 | 381 |
| 382 control_stream.WriteVarint32(copy_count); | 382 control_stream_copy_counts->WriteVarint32(copy_count); |
| 383 control_stream.WriteVarint32(extra_count); | 383 control_stream_extra_counts->WriteVarint32(extra_count); |
| 384 control_stream.WriteVarint32Signed(seek_adjustment); | 384 control_stream_seeks->WriteVarint32Signed(seek_adjustment); |
| 385 ++control_length; | 385 ++control_length; |
| 386 #ifdef DEBUG_bsmedberg | 386 #ifdef DEBUG_bsmedberg |
| 387 LOG(INFO) << StringPrintf( | 387 LOG(INFO) << StringPrintf( |
| 388 "Writing a block: copy: %-8u extra: %-8u seek: %+-9d", | 388 "Writing a block: copy: %-8u extra: %-8u seek: %+-9d", |
| 389 copy_count, extra_count, seek_adjustment); | 389 copy_count, extra_count, seek_adjustment); |
| 390 #endif | 390 #endif |
| 391 | 391 |
| 392 lastscan = scan - lenb; // Include the backward extension in seed. | 392 lastscan = scan - lenb; // Include the backward extension in seed. |
| 393 lastpos = pos - lenb; // ditto. | 393 lastpos = pos - lenb; // ditto. |
| 394 lastoffset = lastpos - lastscan; | 394 lastoffset = lastpos - lastscan; |
| 395 } | 395 } |
| 396 } | 396 } |
| 397 | 397 |
| 398 diff_skips->WriteVarint32(pending_diff_zeros); |
| 399 |
| 398 I.reset(); | 400 I.reset(); |
| 399 | 401 |
| 400 MBSPatchHeader header; | 402 MBSPatchHeader header; |
| 401 // The string will have a null terminator that we don't use, hence '-1'. | 403 // The string will have a null terminator that we don't use, hence '-1'. |
| 402 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag), | 404 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag), |
| 403 MBS_PATCH_HEADER_TAG_must_match_header_field_size); | 405 MBS_PATCH_HEADER_TAG_must_match_header_field_size); |
| 404 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag)); | 406 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag)); |
| 405 header.slen = oldsize; | 407 header.slen = oldsize; |
| 406 header.scrc32 = CalculateCrc(old, oldsize); | 408 header.scrc32 = CalculateCrc(old, oldsize); |
| 407 header.dlen = newsize; | 409 header.dlen = newsize; |
| 408 header.cblen = control_stream.Length(); | |
| 409 header.difflen = diff_bytes_length; | |
| 410 header.extralen = extra_bytes_length; | |
| 411 | 410 |
| 412 WriteHeader(patch_stream, &header); | 411 WriteHeader(patch_stream, &header); |
| 413 | 412 |
| 414 patch_stream->Append(&control_stream); | 413 size_t diff_skips_length = diff_skips->Length(); |
| 415 patch_stream->Write(diff_bytes, diff_bytes_length); | 414 patch_streams.CopyTo(patch_stream); |
| 416 patch_stream->Write(extra_bytes, extra_bytes_length); | |
| 417 | 415 |
| 418 LOG(INFO) << "Control tuples: " << control_length | 416 LOG(INFO) << "Control tuples: " << control_length |
| 419 << " copy bytes: " << diff_bytes_length | 417 << " copy bytes: " << diff_bytes_length |
| 420 << " mistakes: " << diff_bytes_nonzero | 418 << " mistakes: " << diff_bytes_nonzero |
| 419 << " (skips: " << diff_skips_length << ")" |
| 421 << " extra bytes: " << extra_bytes_length; | 420 << " extra bytes: " << extra_bytes_length; |
| 422 | 421 |
| 423 LOG(INFO) << "Uncompressed bsdiff patch size " | 422 LOG(INFO) << "Uncompressed bsdiff patch size " |
| 424 << patch_stream->Length() - initial_patch_stream_length; | 423 << patch_stream->Length() - initial_patch_stream_length; |
| 425 | 424 |
| 426 LOG(INFO) << "End bsdiff " | 425 LOG(INFO) << "End bsdiff " |
| 427 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); | 426 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); |
| 428 | 427 |
| 429 return OK; | 428 return OK; |
| 430 } | 429 } |
| 431 | 430 |
| 432 } // namespace | 431 } // namespace |
| OLD | NEW |