| OLD | NEW |
| 1 /* | 1 /* |
| 2 bsdiff.c -- Binary patch generator. | 2 bsdiff.c -- Binary patch generator. |
| 3 | 3 |
| 4 Copyright 2003 Colin Percival | 4 Copyright 2003 Colin Percival |
| 5 | 5 |
| 6 For the terms under which this work may be distributed, please see | 6 For the terms under which this work may be distributed, please see |
| 7 the adjoining file "LICENSE". | 7 the adjoining file "LICENSE". |
| 8 | 8 |
| 9 ChangeLog: | 9 ChangeLog: |
| 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit | 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit |
| 11 values throughout. | 11 values throughout. |
| 12 --Benjamin Smedberg <benjamin@smedbergs.us> | 12 --Benjamin Smedberg <benjamin@smedbergs.us> |
| 13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table | 13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table |
| 14 provided by libbz2. | 14 provided by libbz2. |
| 15 --Darin Fisher <darin@meer.net> | 15 --Darin Fisher <darin@meer.net> |
| 16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library | 16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library |
| 17 --Rahul Kuchhal | 17 --Rahul Kuchhal |
| 18 2009-03-31 - Change to use Streams. Added lots of comments. | 18 2009-03-31 - Change to use Streams. Added lots of comments. |
| 19 --Stephen Adams <sra@chromium.org> | 19 --Stephen Adams <sra@chromium.org> |
| 20 2010-05-26 - Use a paged array for V and I. The address space may be too |
| 21 fragmented for these big arrays to be contiguous. |
| 22 --Stephen Adams <sra@chromium.org> |
| 20 */ | 23 */ |
| 21 | 24 |
| 22 #include "courgette/third_party/bsdiff.h" | 25 #include "courgette/third_party/bsdiff.h" |
| 23 | 26 |
| 24 #include <stdlib.h> | 27 #include <stdlib.h> |
| 25 #include <algorithm> | 28 #include <algorithm> |
| 26 | 29 |
| 27 #include "base/logging.h" | 30 #include "base/logging.h" |
| 28 #include "base/scoped_ptr.h" | 31 #include "base/scoped_ptr.h" |
| 29 #include "base/string_util.h" | 32 #include "base/string_util.h" |
| 30 #include "base/time.h" | 33 #include "base/time.h" |
| 31 | 34 |
| 32 #include "courgette/crc.h" | 35 #include "courgette/crc.h" |
| 33 #include "courgette/streams.h" | 36 #include "courgette/streams.h" |
| 37 #include "courgette/third_party/paged_array.h" |
| 34 | 38 |
| 35 namespace courgette { | 39 namespace courgette { |
| 36 | 40 |
| 37 // ------------------------------------------------------------------------ | 41 // ------------------------------------------------------------------------ |
| 38 // | 42 // |
| 39 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the | 43 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the |
| 40 // code formatting and variable names. The only changes from the original are | 44 // code formatting and variable names. The changes from the original are (1) |
| 41 // replacing tabs with spaces, indentation, and using 'const'. | 45 // replacing tabs with spaces, (2) indentation, (3) using 'const', and (4) |
| 46 // changing the V and I parameters from int* to PagedArray<int>&. |
| 42 // | 47 // |
| 43 // The code appears to be a rewritten version of the suffix array algorithm | 48 // The code appears to be a rewritten version of the suffix array algorithm |
| 44 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko | 49 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko |
| 45 // Sadakane, special cased for bytes. | 50 // Sadakane, special cased for bytes. |
| 46 | 51 |
| 47 static void | 52 static void |
| 48 split(int *I,int *V,int start,int len,int h) | 53 split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h) |
| 49 { | 54 { |
| 50 int i,j,k,x,tmp,jj,kk; | 55 int i,j,k,x,tmp,jj,kk; |
| 51 | 56 |
| 52 if(len<16) { | 57 if(len<16) { |
| 53 for(k=start;k<start+len;k+=j) { | 58 for(k=start;k<start+len;k+=j) { |
| 54 j=1;x=V[I[k]+h]; | 59 j=1;x=V[I[k]+h]; |
| 55 for(i=1;k+i<start+len;i++) { | 60 for(i=1;k+i<start+len;i++) { |
| 56 if(V[I[k+i]+h]<x) { | 61 if(V[I[k+i]+h]<x) { |
| 57 x=V[I[k+i]+h]; | 62 x=V[I[k+i]+h]; |
| 58 j=0; | 63 j=0; |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 100 | 105 |
| 101 if(jj>start) split(I,V,start,jj-start,h); | 106 if(jj>start) split(I,V,start,jj-start,h); |
| 102 | 107 |
| 103 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; | 108 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; |
| 104 if(jj==kk-1) I[jj]=-1; | 109 if(jj==kk-1) I[jj]=-1; |
| 105 | 110 |
| 106 if(start+len>kk) split(I,V,kk,start+len-kk,h); | 111 if(start+len>kk) split(I,V,kk,start+len-kk,h); |
| 107 } | 112 } |
| 108 | 113 |
| 109 static void | 114 static void |
| 110 qsufsort(int *I,int *V,const unsigned char *old,int oldsize) | 115 qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int old
size) |
| 111 { | 116 { |
| 112 int buckets[256]; | 117 int buckets[256]; |
| 113 int i,h,len; | 118 int i,h,len; |
| 114 | 119 |
| 115 for(i=0;i<256;i++) buckets[i]=0; | 120 for(i=0;i<256;i++) buckets[i]=0; |
| 116 for(i=0;i<oldsize;i++) buckets[old[i]]++; | 121 for(i=0;i<oldsize;i++) buckets[old[i]]++; |
| 117 for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; | 122 for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; |
| 118 for(i=255;i>0;i--) buckets[i]=buckets[i-1]; | 123 for(i=255;i>0;i--) buckets[i]=buckets[i-1]; |
| 119 buckets[0]=0; | 124 buckets[0]=0; |
| 120 | 125 |
| (...skipping 29 matching lines...) Expand all Loading... |
| 150 { | 155 { |
| 151 int i; | 156 int i; |
| 152 | 157 |
| 153 for(i=0;(i<oldsize)&&(i<newsize);i++) | 158 for(i=0;(i<oldsize)&&(i<newsize);i++) |
| 154 if(old[i]!=newbuf[i]) break; | 159 if(old[i]!=newbuf[i]) break; |
| 155 | 160 |
| 156 return i; | 161 return i; |
| 157 } | 162 } |
| 158 | 163 |
| 159 static int | 164 static int |
| 160 search(int *I,const unsigned char *old,int oldsize, | 165 search(PagedArray<int>& I,const unsigned char *old,int oldsize, |
| 161 const unsigned char *newbuf,int newsize,int st,int en,int *pos) | 166 const unsigned char *newbuf,int newsize,int st,int en,int *pos) |
| 162 { | 167 { |
| 163 int x,y; | 168 int x,y; |
| 164 | 169 |
| 165 if(en-st<2) { | 170 if(en-st<2) { |
| 166 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); | 171 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); |
| 167 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); | 172 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); |
| 168 | 173 |
| 169 if(x>y) { | 174 if(x>y) { |
| 170 *pos=I[st]; | 175 *pos=I[st]; |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 207 SinkStream* control_stream_seeks = patch_streams.stream(2); | 212 SinkStream* control_stream_seeks = patch_streams.stream(2); |
| 208 SinkStream* diff_skips = patch_streams.stream(3); | 213 SinkStream* diff_skips = patch_streams.stream(3); |
| 209 SinkStream* diff_bytes = patch_streams.stream(4); | 214 SinkStream* diff_bytes = patch_streams.stream(4); |
| 210 SinkStream* extra_bytes = patch_streams.stream(5); | 215 SinkStream* extra_bytes = patch_streams.stream(5); |
| 211 | 216 |
| 212 const uint8* old = old_stream->Buffer(); | 217 const uint8* old = old_stream->Buffer(); |
| 213 const int oldsize = old_stream->Remaining(); | 218 const int oldsize = old_stream->Remaining(); |
| 214 | 219 |
| 215 uint32 pending_diff_zeros = 0; | 220 uint32 pending_diff_zeros = 0; |
| 216 | 221 |
| 217 scoped_array<int> I(new(std::nothrow) int[oldsize + 1]); | 222 PagedArray<int> I; |
| 218 scoped_array<int> V(new(std::nothrow) int[oldsize + 1]); | 223 PagedArray<int> V; |
| 219 if (I == NULL || V == NULL) | 224 |
| 225 if (!I.Allocate(oldsize + 1)) { |
| 226 LOG(ERROR) << "Could not allocate I[], " << ((oldsize + 1) * sizeof(int)) |
| 227 << " bytes"; |
| 220 return MEM_ERROR; | 228 return MEM_ERROR; |
| 229 } |
| 230 |
| 231 if (!V.Allocate(oldsize + 1)) { |
| 232 LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int)) |
| 233 << " bytes"; |
| 234 return MEM_ERROR; |
| 235 } |
| 221 | 236 |
| 222 base::Time q_start_time = base::Time::Now(); | 237 base::Time q_start_time = base::Time::Now(); |
| 223 qsufsort(I.get(), V.get(), old, oldsize); | 238 qsufsort(I, V, old, oldsize); |
| 224 LOG(INFO) << " done qsufsort " | 239 LOG(INFO) << " done qsufsort " |
| 225 << (base::Time::Now() - q_start_time).InSecondsF(); | 240 << (base::Time::Now() - q_start_time).InSecondsF(); |
| 226 V.reset(); | 241 V.clear(); |
| 227 | 242 |
| 228 const uint8* newbuf = new_stream->Buffer(); | 243 const uint8* newbuf = new_stream->Buffer(); |
| 229 const int newsize = new_stream->Remaining(); | 244 const int newsize = new_stream->Remaining(); |
| 230 | 245 |
| 231 int control_length = 0; | 246 int control_length = 0; |
| 232 int diff_bytes_length = 0; | 247 int diff_bytes_length = 0; |
| 233 int diff_bytes_nonzero = 0; | 248 int diff_bytes_nonzero = 0; |
| 234 int extra_bytes_length = 0; | 249 int extra_bytes_length = 0; |
| 235 | 250 |
| 236 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is | 251 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 277 int scan = 0; | 292 int scan = 0; |
| 278 int match_length = 0; | 293 int match_length = 0; |
| 279 | 294 |
| 280 while (scan < newsize) { | 295 while (scan < newsize) { |
| 281 int pos = 0; | 296 int pos = 0; |
| 282 int oldscore = 0; // Count of how many bytes of the current match at |scan| | 297 int oldscore = 0; // Count of how many bytes of the current match at |scan| |
| 283 // extend the match at |lastscan|. | 298 // extend the match at |lastscan|. |
| 284 | 299 |
| 285 scan += match_length; | 300 scan += match_length; |
| 286 for (int scsc = scan; scan < newsize; ++scan) { | 301 for (int scsc = scan; scan < newsize; ++scan) { |
| 287 match_length = search(I.get(), old, oldsize, | 302 match_length = search(I, old, oldsize, |
| 288 newbuf + scan, newsize - scan, | 303 newbuf + scan, newsize - scan, |
| 289 0, oldsize, &pos); | 304 0, oldsize, &pos); |
| 290 | 305 |
| 291 for ( ; scsc < scan + match_length ; scsc++) | 306 for ( ; scsc < scan + match_length ; scsc++) |
| 292 if ((scsc + lastoffset < oldsize) && | 307 if ((scsc + lastoffset < oldsize) && |
| 293 (old[scsc + lastoffset] == newbuf[scsc])) | 308 (old[scsc + lastoffset] == newbuf[scsc])) |
| 294 oldscore++; | 309 oldscore++; |
| 295 | 310 |
| 296 if ((match_length == oldscore) && (match_length != 0)) | 311 if ((match_length == oldscore) && (match_length != 0)) |
| 297 break; // Good continuing match, case (1) | 312 break; // Good continuing match, case (1) |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 389 #endif | 404 #endif |
| 390 | 405 |
| 391 lastscan = scan - lenb; // Include the backward extension in seed. | 406 lastscan = scan - lenb; // Include the backward extension in seed. |
| 392 lastpos = pos - lenb; // ditto. | 407 lastpos = pos - lenb; // ditto. |
| 393 lastoffset = lastpos - lastscan; | 408 lastoffset = lastpos - lastscan; |
| 394 } | 409 } |
| 395 } | 410 } |
| 396 | 411 |
| 397 diff_skips->WriteVarint32(pending_diff_zeros); | 412 diff_skips->WriteVarint32(pending_diff_zeros); |
| 398 | 413 |
| 399 I.reset(); | 414 I.clear(); |
| 400 | 415 |
| 401 MBSPatchHeader header; | 416 MBSPatchHeader header; |
| 402 // The string will have a null terminator that we don't use, hence '-1'. | 417 // The string will have a null terminator that we don't use, hence '-1'. |
| 403 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag), | 418 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag), |
| 404 MBS_PATCH_HEADER_TAG_must_match_header_field_size); | 419 MBS_PATCH_HEADER_TAG_must_match_header_field_size); |
| 405 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag)); | 420 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag)); |
| 406 header.slen = oldsize; | 421 header.slen = oldsize; |
| 407 header.scrc32 = CalculateCrc(old, oldsize); | 422 header.scrc32 = CalculateCrc(old, oldsize); |
| 408 header.dlen = newsize; | 423 header.dlen = newsize; |
| 409 | 424 |
| (...skipping 11 matching lines...) Expand all Loading... |
| 421 LOG(INFO) << "Uncompressed bsdiff patch size " | 436 LOG(INFO) << "Uncompressed bsdiff patch size " |
| 422 << patch_stream->Length() - initial_patch_stream_length; | 437 << patch_stream->Length() - initial_patch_stream_length; |
| 423 | 438 |
| 424 LOG(INFO) << "End bsdiff " | 439 LOG(INFO) << "End bsdiff " |
| 425 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); | 440 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); |
| 426 | 441 |
| 427 return OK; | 442 return OK; |
| 428 } | 443 } |
| 429 | 444 |
| 430 } // namespace | 445 } // namespace |
| OLD | NEW |