OLD | NEW |
1 /* | 1 /* |
2 bsdiff.c -- Binary patch generator. | 2 bsdiff.c -- Binary patch generator. |
3 | 3 |
4 Copyright 2003 Colin Percival | 4 Copyright 2003 Colin Percival |
5 | 5 |
6 For the terms under which this work may be distributed, please see | 6 For the terms under which this work may be distributed, please see |
7 the adjoining file "LICENSE". | 7 the adjoining file "LICENSE". |
8 | 8 |
9 ChangeLog: | 9 ChangeLog: |
10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit | 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit |
11 values throughout. | 11 values throughout. |
12 --Benjamin Smedberg <benjamin@smedbergs.us> | 12 --Benjamin Smedberg <benjamin@smedbergs.us> |
13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table | 13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table |
14 provided by libbz2. | 14 provided by libbz2. |
15 --Darin Fisher <darin@meer.net> | 15 --Darin Fisher <darin@meer.net> |
16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library | 16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library |
17 --Rahul Kuchhal | 17 --Rahul Kuchhal |
18 2009-03-31 - Change to use Streams. Added lots of comments. | 18 2009-03-31 - Change to use Streams. Added lots of comments. |
19 --Stephen Adams <sra@chromium.org> | 19 --Stephen Adams <sra@chromium.org> |
20 2010-05-26 - Use a paged array for V and I. The address space may be too | 20 2010-05-26 - Use a paged array for V and I. The address space may be too |
21 fragmented for these big arrays to be contiguous. | 21 fragmented for these big arrays to be contiguous. |
22 --Stephen Adams <sra@chromium.org> | 22 --Stephen Adams <sra@chromium.org> |
| 23 2015-08-03 - Extract qsufsort portion to a separate file. |
| 24 --Samuel Huang <huangs@chromium.org> |
23 */ | 25 */ |
24 | 26 |
25 #include "courgette/third_party/bsdiff.h" | 27 #include "courgette/third_party/bsdiff.h" |
26 | 28 |
27 #include <stdlib.h> | 29 #include <stdlib.h> |
28 #include <algorithm> | 30 #include <algorithm> |
29 | 31 |
30 #include "base/logging.h" | 32 #include "base/logging.h" |
31 #include "base/memory/scoped_ptr.h" | 33 #include "base/memory/scoped_ptr.h" |
32 #include "base/strings/string_util.h" | 34 #include "base/strings/string_util.h" |
33 #include "base/time/time.h" | 35 #include "base/time/time.h" |
34 | 36 |
35 #include "courgette/crc.h" | 37 #include "courgette/crc.h" |
36 #include "courgette/streams.h" | 38 #include "courgette/streams.h" |
37 #include "courgette/third_party/paged_array.h" | 39 #include "courgette/third_party/paged_array.h" |
| 40 #include "courgette/third_party/qsufsort.h" |
38 | 41 |
39 namespace courgette { | 42 namespace courgette { |
40 | 43 |
41 // ------------------------------------------------------------------------ | |
42 // | |
43 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the | |
44 // code formatting and variable names. The changes from the original are (1) | |
45 // replacing tabs with spaces, (2) indentation, (3) using 'const', and (4) | |
46 // changing the V and I parameters from int* to PagedArray<int>&. | |
47 // | |
48 // The code appears to be a rewritten version of the suffix array algorithm | |
49 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko | |
50 // Sadakane, special cased for bytes. | |
51 | |
52 static void | |
53 split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h) | |
54 { | |
55 int i,j,k,x,tmp,jj,kk; | |
56 | |
57 if(len<16) { | |
58 for(k=start;k<start+len;k+=j) { | |
59 j=1;x=V[I[k]+h]; | |
60 for(i=1;k+i<start+len;i++) { | |
61 if(V[I[k+i]+h]<x) { | |
62 x=V[I[k+i]+h]; | |
63 j=0; | |
64 }; | |
65 if(V[I[k+i]+h]==x) { | |
66 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; | |
67 j++; | |
68 }; | |
69 }; | |
70 for(i=0;i<j;i++) V[I[k+i]]=k+j-1; | |
71 if(j==1) I[k]=-1; | |
72 }; | |
73 return; | |
74 }; | |
75 | |
76 x=V[I[start+len/2]+h]; | |
77 jj=0;kk=0; | |
78 for(i=start;i<start+len;i++) { | |
79 if(V[I[i]+h]<x) jj++; | |
80 if(V[I[i]+h]==x) kk++; | |
81 }; | |
82 jj+=start;kk+=jj; | |
83 | |
84 i=start;j=0;k=0; | |
85 while(i<jj) { | |
86 if(V[I[i]+h]<x) { | |
87 i++; | |
88 } else if(V[I[i]+h]==x) { | |
89 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; | |
90 j++; | |
91 } else { | |
92 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; | |
93 k++; | |
94 }; | |
95 }; | |
96 | |
97 while(jj+j<kk) { | |
98 if(V[I[jj+j]+h]==x) { | |
99 j++; | |
100 } else { | |
101 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; | |
102 k++; | |
103 }; | |
104 }; | |
105 | |
106 if(jj>start) split(I,V,start,jj-start,h); | |
107 | |
108 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; | |
109 if(jj==kk-1) I[jj]=-1; | |
110 | |
111 if(start+len>kk) split(I,V,kk,start+len-kk,h); | |
112 } | |
113 | |
114 static void | |
115 qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int old
size) | |
116 { | |
117 int buckets[256]; | |
118 int i,h,len; | |
119 | |
120 for(i=0;i<256;i++) buckets[i]=0; | |
121 for(i=0;i<oldsize;i++) buckets[old[i]]++; | |
122 for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; | |
123 for(i=255;i>0;i--) buckets[i]=buckets[i-1]; | |
124 buckets[0]=0; | |
125 | |
126 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; | |
127 I[0]=oldsize; | |
128 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; | |
129 V[oldsize]=0; | |
130 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; | |
131 I[0]=-1; | |
132 | |
133 for(h=1;I[0]!=-(oldsize+1);h+=h) { | |
134 len=0; | |
135 for(i=0;i<oldsize+1;) { | |
136 if(I[i]<0) { | |
137 len-=I[i]; | |
138 i-=I[i]; | |
139 } else { | |
140 if(len) I[i-len]=-len; | |
141 len=V[I[i]]+1-i; | |
142 split(I,V,i,len,h); | |
143 i+=len; | |
144 len=0; | |
145 }; | |
146 }; | |
147 if(len) I[i-len]=-len; | |
148 }; | |
149 | |
150 for(i=0;i<oldsize+1;i++) I[V[i]]=i; | |
151 } | |
152 | |
153 static int | |
154 matchlen(const unsigned char *old,int oldsize,const unsigned char *newbuf,int ne
wsize) | |
155 { | |
156 int i; | |
157 | |
158 for(i=0;(i<oldsize)&&(i<newsize);i++) | |
159 if(old[i]!=newbuf[i]) break; | |
160 | |
161 return i; | |
162 } | |
163 | |
164 static int | |
165 search(PagedArray<int>& I,const unsigned char *old,int oldsize, | |
166 const unsigned char *newbuf,int newsize,int st,int en,int *pos) | |
167 { | |
168 int x,y; | |
169 | |
170 if(en-st<2) { | |
171 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); | |
172 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); | |
173 | |
174 if(x>y) { | |
175 *pos=I[st]; | |
176 return x; | |
177 } else { | |
178 *pos=I[en]; | |
179 return y; | |
180 } | |
181 } | |
182 | |
183 x=st+(en-st)/2; | |
184 if(memcmp(old+I[x],newbuf,std::min(oldsize-I[x],newsize))<0) { | |
185 return search(I,old,oldsize,newbuf,newsize,x,en,pos); | |
186 } else { | |
187 return search(I,old,oldsize,newbuf,newsize,st,x,pos); | |
188 } | |
189 } | |
190 | |
191 // End of 'verbatim' code. | |
192 // ------------------------------------------------------------------------ | |
193 | |
194 static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) { | 44 static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) { |
195 bool ok = stream->Write(header->tag, sizeof(header->tag)); | 45 bool ok = stream->Write(header->tag, sizeof(header->tag)); |
196 ok &= stream->WriteVarint32(header->slen); | 46 ok &= stream->WriteVarint32(header->slen); |
197 ok &= stream->WriteVarint32(header->scrc32); | 47 ok &= stream->WriteVarint32(header->scrc32); |
198 ok &= stream->WriteVarint32(header->dlen); | 48 ok &= stream->WriteVarint32(header->dlen); |
199 return ok; | 49 return ok; |
200 } | 50 } |
201 | 51 |
202 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, | 52 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, |
203 SourceStream* new_stream, | 53 SourceStream* new_stream, |
(...skipping 25 matching lines...) Expand all Loading... |
229 return MEM_ERROR; | 79 return MEM_ERROR; |
230 } | 80 } |
231 | 81 |
232 if (!V.Allocate(oldsize + 1)) { | 82 if (!V.Allocate(oldsize + 1)) { |
233 LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int)) | 83 LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int)) |
234 << " bytes"; | 84 << " bytes"; |
235 return MEM_ERROR; | 85 return MEM_ERROR; |
236 } | 86 } |
237 | 87 |
238 base::Time q_start_time = base::Time::Now(); | 88 base::Time q_start_time = base::Time::Now(); |
239 qsufsort(I, V, old, oldsize); | 89 qsuf::qsufsort<PagedArray<int>&>(I, V, old, oldsize); |
240 VLOG(1) << " done qsufsort " | 90 VLOG(1) << " done qsufsort " |
241 << (base::Time::Now() - q_start_time).InSecondsF(); | 91 << (base::Time::Now() - q_start_time).InSecondsF(); |
242 V.clear(); | 92 V.clear(); |
243 | 93 |
244 const uint8* newbuf = new_stream->Buffer(); | 94 const uint8* newbuf = new_stream->Buffer(); |
245 const int newsize = static_cast<int>(new_stream->Remaining()); | 95 const int newsize = static_cast<int>(new_stream->Remaining()); |
246 | 96 |
247 int control_length = 0; | 97 int control_length = 0; |
248 int diff_bytes_length = 0; | 98 int diff_bytes_length = 0; |
249 int diff_bytes_nonzero = 0; | 99 int diff_bytes_nonzero = 0; |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
293 int scan = 0; | 143 int scan = 0; |
294 int match_length = 0; | 144 int match_length = 0; |
295 | 145 |
296 while (scan < newsize) { | 146 while (scan < newsize) { |
297 int pos = 0; | 147 int pos = 0; |
298 int oldscore = 0; // Count of how many bytes of the current match at |scan| | 148 int oldscore = 0; // Count of how many bytes of the current match at |scan| |
299 // extend the match at |lastscan|. | 149 // extend the match at |lastscan|. |
300 | 150 |
301 scan += match_length; | 151 scan += match_length; |
302 for (int scsc = scan; scan < newsize; ++scan) { | 152 for (int scsc = scan; scan < newsize; ++scan) { |
303 match_length = search(I, old, oldsize, | 153 match_length = qsuf::search<PagedArray<int>&>( |
304 newbuf + scan, newsize - scan, | 154 I, old, oldsize, newbuf + scan, newsize - scan, 0, oldsize, &pos); |
305 0, oldsize, &pos); | |
306 | 155 |
307 for ( ; scsc < scan + match_length ; scsc++) | 156 for ( ; scsc < scan + match_length ; scsc++) |
308 if ((scsc + lastoffset < oldsize) && | 157 if ((scsc + lastoffset < oldsize) && |
309 (old[scsc + lastoffset] == newbuf[scsc])) | 158 (old[scsc + lastoffset] == newbuf[scsc])) |
310 oldscore++; | 159 oldscore++; |
311 | 160 |
312 if ((match_length == oldscore) && (match_length != 0)) | 161 if ((match_length == oldscore) && (match_length != 0)) |
313 break; // Good continuing match, case (1) | 162 break; // Good continuing match, case (1) |
314 if (match_length > oldscore + 8) | 163 if (match_length > oldscore + 8) |
315 break; // New seed match, case (2) | 164 break; // New seed match, case (2) |
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
444 << " (skips: " << diff_skips_length << ")" | 293 << " (skips: " << diff_skips_length << ")" |
445 << " extra bytes: " << extra_bytes_length | 294 << " extra bytes: " << extra_bytes_length |
446 << "\nUncompressed bsdiff patch size " | 295 << "\nUncompressed bsdiff patch size " |
447 << patch_stream->Length() - initial_patch_stream_length | 296 << patch_stream->Length() - initial_patch_stream_length |
448 << "\nEnd bsdiff " | 297 << "\nEnd bsdiff " |
449 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); | 298 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); |
450 | 299 |
451 return OK; | 300 return OK; |
452 } | 301 } |
453 | 302 |
454 } // namespace | 303 } // namespace courgette |
OLD | NEW |