Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(121)

Side by Side Diff: courgette/third_party/bsdiff_create.cc

Issue 2228003: Use an array of pages for the large arrays.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 10 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « courgette/courgette.gyp ('k') | courgette/third_party/paged_array.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 bsdiff.c -- Binary patch generator. 2 bsdiff.c -- Binary patch generator.
3 3
4 Copyright 2003 Colin Percival 4 Copyright 2003 Colin Percival
5 5
6 For the terms under which this work may be distributed, please see 6 For the terms under which this work may be distributed, please see
7 the adjoining file "LICENSE". 7 the adjoining file "LICENSE".
8 8
9 ChangeLog: 9 ChangeLog:
10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
11 values throughout. 11 values throughout.
12 --Benjamin Smedberg <benjamin@smedbergs.us> 12 --Benjamin Smedberg <benjamin@smedbergs.us>
13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table 13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
14 provided by libbz2. 14 provided by libbz2.
15 --Darin Fisher <darin@meer.net> 15 --Darin Fisher <darin@meer.net>
16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library 16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library
17 --Rahul Kuchhal 17 --Rahul Kuchhal
18 2009-03-31 - Change to use Streams. Added lots of comments. 18 2009-03-31 - Change to use Streams. Added lots of comments.
19 --Stephen Adams <sra@chromium.org> 19 --Stephen Adams <sra@chromium.org>
20 2010-05-26 - Use a paged array for V and I. The address space may be too
21 fragmented for these big arrays to be contiguous.
22 --Stephen Adams <sra@chromium.org>
20 */ 23 */
21 24
22 #include "courgette/third_party/bsdiff.h" 25 #include "courgette/third_party/bsdiff.h"
23 26
24 #include <stdlib.h> 27 #include <stdlib.h>
25 #include <algorithm> 28 #include <algorithm>
26 29
27 #include "base/logging.h" 30 #include "base/logging.h"
28 #include "base/scoped_ptr.h" 31 #include "base/scoped_ptr.h"
29 #include "base/string_util.h" 32 #include "base/string_util.h"
30 #include "base/time.h" 33 #include "base/time.h"
31 34
32 #include "courgette/crc.h" 35 #include "courgette/crc.h"
33 #include "courgette/streams.h" 36 #include "courgette/streams.h"
37 #include "courgette/third_party/paged_array.h"
34 38
35 namespace courgette { 39 namespace courgette {
36 40
37 // ------------------------------------------------------------------------ 41 // ------------------------------------------------------------------------
38 // 42 //
39 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the 43 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the
40 // code formatting and variable names. The only changes from the original are 44 // code formatting and variable names. The changes from the original are (1)
41 // replacing tabs with spaces, indentation, and using 'const'. 45 // replacing tabs with spaces, (2) indentation, (3) using 'const', and (4)
46 // changing the V and I parameters from int* to PagedArray<int>&.
42 // 47 //
43 // The code appears to be a rewritten version of the suffix array algorithm 48 // The code appears to be a rewritten version of the suffix array algorithm
44 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko 49 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko
45 // Sadakane, special cased for bytes. 50 // Sadakane, special cased for bytes.
46 51
47 static void 52 static void
48 split(int *I,int *V,int start,int len,int h) 53 split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h)
49 { 54 {
50 int i,j,k,x,tmp,jj,kk; 55 int i,j,k,x,tmp,jj,kk;
51 56
52 if(len<16) { 57 if(len<16) {
53 for(k=start;k<start+len;k+=j) { 58 for(k=start;k<start+len;k+=j) {
54 j=1;x=V[I[k]+h]; 59 j=1;x=V[I[k]+h];
55 for(i=1;k+i<start+len;i++) { 60 for(i=1;k+i<start+len;i++) {
56 if(V[I[k+i]+h]<x) { 61 if(V[I[k+i]+h]<x) {
57 x=V[I[k+i]+h]; 62 x=V[I[k+i]+h];
58 j=0; 63 j=0;
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
100 105
101 if(jj>start) split(I,V,start,jj-start,h); 106 if(jj>start) split(I,V,start,jj-start,h);
102 107
103 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; 108 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
104 if(jj==kk-1) I[jj]=-1; 109 if(jj==kk-1) I[jj]=-1;
105 110
106 if(start+len>kk) split(I,V,kk,start+len-kk,h); 111 if(start+len>kk) split(I,V,kk,start+len-kk,h);
107 } 112 }
108 113
109 static void 114 static void
110 qsufsort(int *I,int *V,const unsigned char *old,int oldsize) 115 qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int old size)
111 { 116 {
112 int buckets[256]; 117 int buckets[256];
113 int i,h,len; 118 int i,h,len;
114 119
115 for(i=0;i<256;i++) buckets[i]=0; 120 for(i=0;i<256;i++) buckets[i]=0;
116 for(i=0;i<oldsize;i++) buckets[old[i]]++; 121 for(i=0;i<oldsize;i++) buckets[old[i]]++;
117 for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; 122 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
118 for(i=255;i>0;i--) buckets[i]=buckets[i-1]; 123 for(i=255;i>0;i--) buckets[i]=buckets[i-1];
119 buckets[0]=0; 124 buckets[0]=0;
120 125
(...skipping 29 matching lines...) Expand all
150 { 155 {
151 int i; 156 int i;
152 157
153 for(i=0;(i<oldsize)&&(i<newsize);i++) 158 for(i=0;(i<oldsize)&&(i<newsize);i++)
154 if(old[i]!=newbuf[i]) break; 159 if(old[i]!=newbuf[i]) break;
155 160
156 return i; 161 return i;
157 } 162 }
158 163
159 static int 164 static int
160 search(int *I,const unsigned char *old,int oldsize, 165 search(PagedArray<int>& I,const unsigned char *old,int oldsize,
161 const unsigned char *newbuf,int newsize,int st,int en,int *pos) 166 const unsigned char *newbuf,int newsize,int st,int en,int *pos)
162 { 167 {
163 int x,y; 168 int x,y;
164 169
165 if(en-st<2) { 170 if(en-st<2) {
166 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); 171 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
167 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); 172 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
168 173
169 if(x>y) { 174 if(x>y) {
170 *pos=I[st]; 175 *pos=I[st];
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
207 SinkStream* control_stream_seeks = patch_streams.stream(2); 212 SinkStream* control_stream_seeks = patch_streams.stream(2);
208 SinkStream* diff_skips = patch_streams.stream(3); 213 SinkStream* diff_skips = patch_streams.stream(3);
209 SinkStream* diff_bytes = patch_streams.stream(4); 214 SinkStream* diff_bytes = patch_streams.stream(4);
210 SinkStream* extra_bytes = patch_streams.stream(5); 215 SinkStream* extra_bytes = patch_streams.stream(5);
211 216
212 const uint8* old = old_stream->Buffer(); 217 const uint8* old = old_stream->Buffer();
213 const int oldsize = old_stream->Remaining(); 218 const int oldsize = old_stream->Remaining();
214 219
215 uint32 pending_diff_zeros = 0; 220 uint32 pending_diff_zeros = 0;
216 221
217 scoped_array<int> I(new(std::nothrow) int[oldsize + 1]); 222 PagedArray<int> I;
218 scoped_array<int> V(new(std::nothrow) int[oldsize + 1]); 223 PagedArray<int> V;
219 if (I == NULL || V == NULL) 224
225 if (!I.Allocate(oldsize + 1)) {
226 LOG(ERROR) << "Could not allocate I[], " << ((oldsize + 1) * sizeof(int))
227 << " bytes";
220 return MEM_ERROR; 228 return MEM_ERROR;
229 }
230
231 if (!V.Allocate(oldsize + 1)) {
232 LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int))
233 << " bytes";
234 return MEM_ERROR;
235 }
221 236
222 base::Time q_start_time = base::Time::Now(); 237 base::Time q_start_time = base::Time::Now();
223 qsufsort(I.get(), V.get(), old, oldsize); 238 qsufsort(I, V, old, oldsize);
224 LOG(INFO) << " done qsufsort " 239 LOG(INFO) << " done qsufsort "
225 << (base::Time::Now() - q_start_time).InSecondsF(); 240 << (base::Time::Now() - q_start_time).InSecondsF();
226 V.reset(); 241 V.clear();
227 242
228 const uint8* newbuf = new_stream->Buffer(); 243 const uint8* newbuf = new_stream->Buffer();
229 const int newsize = new_stream->Remaining(); 244 const int newsize = new_stream->Remaining();
230 245
231 int control_length = 0; 246 int control_length = 0;
232 int diff_bytes_length = 0; 247 int diff_bytes_length = 0;
233 int diff_bytes_nonzero = 0; 248 int diff_bytes_nonzero = 0;
234 int extra_bytes_length = 0; 249 int extra_bytes_length = 0;
235 250
236 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is 251 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
277 int scan = 0; 292 int scan = 0;
278 int match_length = 0; 293 int match_length = 0;
279 294
280 while (scan < newsize) { 295 while (scan < newsize) {
281 int pos = 0; 296 int pos = 0;
282 int oldscore = 0; // Count of how many bytes of the current match at |scan| 297 int oldscore = 0; // Count of how many bytes of the current match at |scan|
283 // extend the match at |lastscan|. 298 // extend the match at |lastscan|.
284 299
285 scan += match_length; 300 scan += match_length;
286 for (int scsc = scan; scan < newsize; ++scan) { 301 for (int scsc = scan; scan < newsize; ++scan) {
287 match_length = search(I.get(), old, oldsize, 302 match_length = search(I, old, oldsize,
288 newbuf + scan, newsize - scan, 303 newbuf + scan, newsize - scan,
289 0, oldsize, &pos); 304 0, oldsize, &pos);
290 305
291 for ( ; scsc < scan + match_length ; scsc++) 306 for ( ; scsc < scan + match_length ; scsc++)
292 if ((scsc + lastoffset < oldsize) && 307 if ((scsc + lastoffset < oldsize) &&
293 (old[scsc + lastoffset] == newbuf[scsc])) 308 (old[scsc + lastoffset] == newbuf[scsc]))
294 oldscore++; 309 oldscore++;
295 310
296 if ((match_length == oldscore) && (match_length != 0)) 311 if ((match_length == oldscore) && (match_length != 0))
297 break; // Good continuing match, case (1) 312 break; // Good continuing match, case (1)
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
389 #endif 404 #endif
390 405
391 lastscan = scan - lenb; // Include the backward extension in seed. 406 lastscan = scan - lenb; // Include the backward extension in seed.
392 lastpos = pos - lenb; // ditto. 407 lastpos = pos - lenb; // ditto.
393 lastoffset = lastpos - lastscan; 408 lastoffset = lastpos - lastscan;
394 } 409 }
395 } 410 }
396 411
397 diff_skips->WriteVarint32(pending_diff_zeros); 412 diff_skips->WriteVarint32(pending_diff_zeros);
398 413
399 I.reset(); 414 I.clear();
400 415
401 MBSPatchHeader header; 416 MBSPatchHeader header;
402 // The string will have a null terminator that we don't use, hence '-1'. 417 // The string will have a null terminator that we don't use, hence '-1'.
403 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag), 418 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag),
404 MBS_PATCH_HEADER_TAG_must_match_header_field_size); 419 MBS_PATCH_HEADER_TAG_must_match_header_field_size);
405 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag)); 420 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag));
406 header.slen = oldsize; 421 header.slen = oldsize;
407 header.scrc32 = CalculateCrc(old, oldsize); 422 header.scrc32 = CalculateCrc(old, oldsize);
408 header.dlen = newsize; 423 header.dlen = newsize;
409 424
(...skipping 11 matching lines...) Expand all
421 LOG(INFO) << "Uncompressed bsdiff patch size " 436 LOG(INFO) << "Uncompressed bsdiff patch size "
422 << patch_stream->Length() - initial_patch_stream_length; 437 << patch_stream->Length() - initial_patch_stream_length;
423 438
424 LOG(INFO) << "End bsdiff " 439 LOG(INFO) << "End bsdiff "
425 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); 440 << (base::Time::Now() - start_bsdiff_time).InSecondsF();
426 441
427 return OK; 442 return OK;
428 } 443 }
429 444
430 } // namespace 445 } // namespace
OLDNEW
« no previous file with comments | « courgette/courgette.gyp ('k') | courgette/third_party/paged_array.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698