OLD | NEW |
1 /* | 1 /* |
2 bsdiff.c -- Binary patch generator. | 2 bsdiff.c -- Binary patch generator. |
3 | 3 |
4 Copyright 2003 Colin Percival | 4 Copyright 2003 Colin Percival |
5 | 5 |
6 For the terms under which this work may be distributed, please see | 6 For the terms under which this work may be distributed, please see |
7 the adjoining file "LICENSE". | 7 the adjoining file "LICENSE". |
8 | 8 |
9 ChangeLog: | 9 ChangeLog: |
10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit | 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit |
11 values throughout. | 11 values throughout. |
12 --Benjamin Smedberg <benjamin@smedbergs.us> | 12 --Benjamin Smedberg <benjamin@smedbergs.us> |
13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table | 13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table |
14 provided by libbz2. | 14 provided by libbz2. |
15 --Darin Fisher <darin@meer.net> | 15 --Darin Fisher <darin@meer.net> |
16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library | 16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library |
17 --Rahul Kuchhal | 17 --Rahul Kuchhal |
18 2009-03-31 - Change to use Streams. Added lots of comments. | 18 2009-03-31 - Change to use Streams. Added lots of comments. |
19 --Stephen Adams <sra@chromium.org> | 19 --Stephen Adams <sra@chromium.org> |
| 20 2010-05-26 - Use a paged array for V and I. The address space may be too |
| 21 fragmented for these big arrays to be contiguous. |
| 22 --Stephen Adams <sra@chromium.org> |
20 */ | 23 */ |
21 | 24 |
22 #include "courgette/third_party/bsdiff.h" | 25 #include "courgette/third_party/bsdiff.h" |
23 | 26 |
24 #include <stdlib.h> | 27 #include <stdlib.h> |
25 #include <algorithm> | 28 #include <algorithm> |
26 | 29 |
27 #include "base/logging.h" | 30 #include "base/logging.h" |
28 #include "base/scoped_ptr.h" | 31 #include "base/scoped_ptr.h" |
29 #include "base/string_util.h" | 32 #include "base/string_util.h" |
30 #include "base/time.h" | 33 #include "base/time.h" |
31 | 34 |
32 #include "courgette/crc.h" | 35 #include "courgette/crc.h" |
33 #include "courgette/streams.h" | 36 #include "courgette/streams.h" |
| 37 #include "courgette/third_party/paged_array.h" |
34 | 38 |
35 namespace courgette { | 39 namespace courgette { |
36 | 40 |
37 // ------------------------------------------------------------------------ | 41 // ------------------------------------------------------------------------ |
38 // | 42 // |
39 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the | 43 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the |
40 // code formatting and variable names. The only changes from the original are | 44 // code formatting and variable names. The changes from the original are (1) |
41 // replacing tabs with spaces, indentation, and using 'const'. | 45 // replacing tabs with spaces, (2) indentation, (3) using 'const', and (4) |
| 46 // changing the V and I parameters from int* to PagedArray<int>&. |
42 // | 47 // |
43 // The code appears to be a rewritten version of the suffix array algorithm | 48 // The code appears to be a rewritten version of the suffix array algorithm |
44 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko | 49 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko |
45 // Sadakane, special cased for bytes. | 50 // Sadakane, special cased for bytes. |
46 | 51 |
47 static void | 52 static void |
48 split(int *I,int *V,int start,int len,int h) | 53 split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h) |
49 { | 54 { |
50 int i,j,k,x,tmp,jj,kk; | 55 int i,j,k,x,tmp,jj,kk; |
51 | 56 |
52 if(len<16) { | 57 if(len<16) { |
53 for(k=start;k<start+len;k+=j) { | 58 for(k=start;k<start+len;k+=j) { |
54 j=1;x=V[I[k]+h]; | 59 j=1;x=V[I[k]+h]; |
55 for(i=1;k+i<start+len;i++) { | 60 for(i=1;k+i<start+len;i++) { |
56 if(V[I[k+i]+h]<x) { | 61 if(V[I[k+i]+h]<x) { |
57 x=V[I[k+i]+h]; | 62 x=V[I[k+i]+h]; |
58 j=0; | 63 j=0; |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
100 | 105 |
101 if(jj>start) split(I,V,start,jj-start,h); | 106 if(jj>start) split(I,V,start,jj-start,h); |
102 | 107 |
103 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; | 108 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; |
104 if(jj==kk-1) I[jj]=-1; | 109 if(jj==kk-1) I[jj]=-1; |
105 | 110 |
106 if(start+len>kk) split(I,V,kk,start+len-kk,h); | 111 if(start+len>kk) split(I,V,kk,start+len-kk,h); |
107 } | 112 } |
108 | 113 |
109 static void | 114 static void |
110 qsufsort(int *I,int *V,const unsigned char *old,int oldsize) | 115 qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int old
size) |
111 { | 116 { |
112 int buckets[256]; | 117 int buckets[256]; |
113 int i,h,len; | 118 int i,h,len; |
114 | 119 |
115 for(i=0;i<256;i++) buckets[i]=0; | 120 for(i=0;i<256;i++) buckets[i]=0; |
116 for(i=0;i<oldsize;i++) buckets[old[i]]++; | 121 for(i=0;i<oldsize;i++) buckets[old[i]]++; |
117 for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; | 122 for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; |
118 for(i=255;i>0;i--) buckets[i]=buckets[i-1]; | 123 for(i=255;i>0;i--) buckets[i]=buckets[i-1]; |
119 buckets[0]=0; | 124 buckets[0]=0; |
120 | 125 |
(...skipping 29 matching lines...) Expand all Loading... |
150 { | 155 { |
151 int i; | 156 int i; |
152 | 157 |
153 for(i=0;(i<oldsize)&&(i<newsize);i++) | 158 for(i=0;(i<oldsize)&&(i<newsize);i++) |
154 if(old[i]!=newbuf[i]) break; | 159 if(old[i]!=newbuf[i]) break; |
155 | 160 |
156 return i; | 161 return i; |
157 } | 162 } |
158 | 163 |
159 static int | 164 static int |
160 search(int *I,const unsigned char *old,int oldsize, | 165 search(PagedArray<int>& I,const unsigned char *old,int oldsize, |
161 const unsigned char *newbuf,int newsize,int st,int en,int *pos) | 166 const unsigned char *newbuf,int newsize,int st,int en,int *pos) |
162 { | 167 { |
163 int x,y; | 168 int x,y; |
164 | 169 |
165 if(en-st<2) { | 170 if(en-st<2) { |
166 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); | 171 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); |
167 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); | 172 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); |
168 | 173 |
169 if(x>y) { | 174 if(x>y) { |
170 *pos=I[st]; | 175 *pos=I[st]; |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
207 SinkStream* control_stream_seeks = patch_streams.stream(2); | 212 SinkStream* control_stream_seeks = patch_streams.stream(2); |
208 SinkStream* diff_skips = patch_streams.stream(3); | 213 SinkStream* diff_skips = patch_streams.stream(3); |
209 SinkStream* diff_bytes = patch_streams.stream(4); | 214 SinkStream* diff_bytes = patch_streams.stream(4); |
210 SinkStream* extra_bytes = patch_streams.stream(5); | 215 SinkStream* extra_bytes = patch_streams.stream(5); |
211 | 216 |
212 const uint8* old = old_stream->Buffer(); | 217 const uint8* old = old_stream->Buffer(); |
213 const int oldsize = old_stream->Remaining(); | 218 const int oldsize = old_stream->Remaining(); |
214 | 219 |
215 uint32 pending_diff_zeros = 0; | 220 uint32 pending_diff_zeros = 0; |
216 | 221 |
217 scoped_array<int> I(new(std::nothrow) int[oldsize + 1]); | 222 PagedArray<int> I; |
218 scoped_array<int> V(new(std::nothrow) int[oldsize + 1]); | 223 PagedArray<int> V; |
219 if (I == NULL || V == NULL) | 224 |
| 225 if (!I.Allocate(oldsize + 1)) { |
| 226 LOG(ERROR) << "Could not allocate I[], " << ((oldsize + 1) * sizeof(int)) |
| 227 << " bytes"; |
220 return MEM_ERROR; | 228 return MEM_ERROR; |
| 229 } |
| 230 |
| 231 if (!V.Allocate(oldsize + 1)) { |
| 232 LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int)) |
| 233 << " bytes"; |
| 234 return MEM_ERROR; |
| 235 } |
221 | 236 |
222 base::Time q_start_time = base::Time::Now(); | 237 base::Time q_start_time = base::Time::Now(); |
223 qsufsort(I.get(), V.get(), old, oldsize); | 238 qsufsort(I, V, old, oldsize); |
224 LOG(INFO) << " done qsufsort " | 239 LOG(INFO) << " done qsufsort " |
225 << (base::Time::Now() - q_start_time).InSecondsF(); | 240 << (base::Time::Now() - q_start_time).InSecondsF(); |
226 V.reset(); | 241 V.clear(); |
227 | 242 |
228 const uint8* newbuf = new_stream->Buffer(); | 243 const uint8* newbuf = new_stream->Buffer(); |
229 const int newsize = new_stream->Remaining(); | 244 const int newsize = new_stream->Remaining(); |
230 | 245 |
231 int control_length = 0; | 246 int control_length = 0; |
232 int diff_bytes_length = 0; | 247 int diff_bytes_length = 0; |
233 int diff_bytes_nonzero = 0; | 248 int diff_bytes_nonzero = 0; |
234 int extra_bytes_length = 0; | 249 int extra_bytes_length = 0; |
235 | 250 |
236 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is | 251 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
277 int scan = 0; | 292 int scan = 0; |
278 int match_length = 0; | 293 int match_length = 0; |
279 | 294 |
280 while (scan < newsize) { | 295 while (scan < newsize) { |
281 int pos = 0; | 296 int pos = 0; |
282 int oldscore = 0; // Count of how many bytes of the current match at |scan| | 297 int oldscore = 0; // Count of how many bytes of the current match at |scan| |
283 // extend the match at |lastscan|. | 298 // extend the match at |lastscan|. |
284 | 299 |
285 scan += match_length; | 300 scan += match_length; |
286 for (int scsc = scan; scan < newsize; ++scan) { | 301 for (int scsc = scan; scan < newsize; ++scan) { |
287 match_length = search(I.get(), old, oldsize, | 302 match_length = search(I, old, oldsize, |
288 newbuf + scan, newsize - scan, | 303 newbuf + scan, newsize - scan, |
289 0, oldsize, &pos); | 304 0, oldsize, &pos); |
290 | 305 |
291 for ( ; scsc < scan + match_length ; scsc++) | 306 for ( ; scsc < scan + match_length ; scsc++) |
292 if ((scsc + lastoffset < oldsize) && | 307 if ((scsc + lastoffset < oldsize) && |
293 (old[scsc + lastoffset] == newbuf[scsc])) | 308 (old[scsc + lastoffset] == newbuf[scsc])) |
294 oldscore++; | 309 oldscore++; |
295 | 310 |
296 if ((match_length == oldscore) && (match_length != 0)) | 311 if ((match_length == oldscore) && (match_length != 0)) |
297 break; // Good continuing match, case (1) | 312 break; // Good continuing match, case (1) |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
389 #endif | 404 #endif |
390 | 405 |
391 lastscan = scan - lenb; // Include the backward extension in seed. | 406 lastscan = scan - lenb; // Include the backward extension in seed. |
392 lastpos = pos - lenb; // ditto. | 407 lastpos = pos - lenb; // ditto. |
393 lastoffset = lastpos - lastscan; | 408 lastoffset = lastpos - lastscan; |
394 } | 409 } |
395 } | 410 } |
396 | 411 |
397 diff_skips->WriteVarint32(pending_diff_zeros); | 412 diff_skips->WriteVarint32(pending_diff_zeros); |
398 | 413 |
399 I.reset(); | 414 I.clear(); |
400 | 415 |
401 MBSPatchHeader header; | 416 MBSPatchHeader header; |
402 // The string will have a null terminator that we don't use, hence '-1'. | 417 // The string will have a null terminator that we don't use, hence '-1'. |
403 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag), | 418 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag), |
404 MBS_PATCH_HEADER_TAG_must_match_header_field_size); | 419 MBS_PATCH_HEADER_TAG_must_match_header_field_size); |
405 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag)); | 420 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag)); |
406 header.slen = oldsize; | 421 header.slen = oldsize; |
407 header.scrc32 = CalculateCrc(old, oldsize); | 422 header.scrc32 = CalculateCrc(old, oldsize); |
408 header.dlen = newsize; | 423 header.dlen = newsize; |
409 | 424 |
(...skipping 11 matching lines...) Expand all Loading... |
421 LOG(INFO) << "Uncompressed bsdiff patch size " | 436 LOG(INFO) << "Uncompressed bsdiff patch size " |
422 << patch_stream->Length() - initial_patch_stream_length; | 437 << patch_stream->Length() - initial_patch_stream_length; |
423 | 438 |
424 LOG(INFO) << "End bsdiff " | 439 LOG(INFO) << "End bsdiff " |
425 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); | 440 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); |
426 | 441 |
427 return OK; | 442 return OK; |
428 } | 443 } |
429 | 444 |
430 } // namespace | 445 } // namespace |
OLD | NEW |