courgette/third_party/bsdiff_create.cc - Issue 2228003: Use an array of pages for the large arrays....

Side by Side Diff: courgette/third_party/bsdiff_create.cc

Issue 2228003: Use an array of pages for the large arrays.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 10 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 bsdiff.c -- Binary patch generator.	2 bsdiff.c -- Binary patch generator.

3	3

4 Copyright 2003 Colin Percival	4 Copyright 2003 Colin Percival

5	5

6 For the terms under which this work may be distributed, please see	6 For the terms under which this work may be distributed, please see

7 the adjoining file "LICENSE".	7 the adjoining file "LICENSE".

8	8

9 ChangeLog:	9 ChangeLog:

10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit	10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit

11 values throughout.	11 values throughout.

12 --Benjamin Smedberg <benjamin@smedbergs.us>	12 --Benjamin Smedberg <benjamin@smedbergs.us>

13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table	13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table

14 provided by libbz2.	14 provided by libbz2.

15 --Darin Fisher <darin@meer.net>	15 --Darin Fisher <darin@meer.net>

16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library	16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library

17 --Rahul Kuchhal	17 --Rahul Kuchhal

18 2009-03-31 - Change to use Streams. Added lots of comments.	18 2009-03-31 - Change to use Streams. Added lots of comments.

19 --Stephen Adams <sra@chromium.org>	19 --Stephen Adams <sra@chromium.org>

	20 2010-05-26 - Use a paged array for V and I. The address space may be too

	21 fragmented for these big arrays to be contiguous.

	22 --Stephen Adams <sra@chromium.org>

20 */	23 */

21	24

22 #include "courgette/third_party/bsdiff.h"	25 #include "courgette/third_party/bsdiff.h"

23	26

24 #include <stdlib.h>	27 #include <stdlib.h>

25 #include <algorithm>	28 #include <algorithm>

26	29

27 #include "base/logging.h"	30 #include "base/logging.h"

28 #include "base/scoped_ptr.h"	31 #include "base/scoped_ptr.h"

29 #include "base/string_util.h"	32 #include "base/string_util.h"

30 #include "base/time.h"	33 #include "base/time.h"

31	34

32 #include "courgette/crc.h"	35 #include "courgette/crc.h"

33 #include "courgette/streams.h"	36 #include "courgette/streams.h"

	37 #include "courgette/third_party/paged_array.h"

34	38

35 namespace courgette {	39 namespace courgette {

36	40

37 // ------------------------------------------------------------------------	41 // ------------------------------------------------------------------------

38 //	42 //

39 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the	43 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the

40 // code formatting and variable names. The only changes from the original are	44 // code formatting and variable names. The changes from the original are (1)

41 // replacing tabs with spaces, indentation, and using 'const'.	45 // replacing tabs with spaces, (2) indentation, (3) using 'const', and (4)

	46 // changing the V and I parameters from int* to PagedArray<int>&.

42 //	47 //

43 // The code appears to be a rewritten version of the suffix array algorithm	48 // The code appears to be a rewritten version of the suffix array algorithm

44 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko	49 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko

45 // Sadakane, special cased for bytes.	50 // Sadakane, special cased for bytes.

46	51

47 static void	52 static void

48 split(int I,int V,int start,int len,int h)	53 split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h)

49 {	54 {

50 int i,j,k,x,tmp,jj,kk;	55 int i,j,k,x,tmp,jj,kk;

51	56

52 if(len<16) {	57 if(len<16) {

53 for(k=start;k<start+len;k+=j) {	58 for(k=start;k<start+len;k+=j) {

54 j=1;x=V[I[k]+h];	59 j=1;x=V[I[k]+h];

55 for(i=1;k+i<start+len;i++) {	60 for(i=1;k+i<start+len;i++) {

56 if(V[I[k+i]+h]<x) {	61 if(V[I[k+i]+h]<x) {

57 x=V[I[k+i]+h];	62 x=V[I[k+i]+h];

58 j=0;	63 j=0;

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
100	105

101 if(jj>start) split(I,V,start,jj-start,h);	106 if(jj>start) split(I,V,start,jj-start,h);

102	107

103 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;	108 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;

104 if(jj==kk-1) I[jj]=-1;	109 if(jj==kk-1) I[jj]=-1;

105	110

106 if(start+len>kk) split(I,V,kk,start+len-kk,h);	111 if(start+len>kk) split(I,V,kk,start+len-kk,h);

107 }	112 }

108	113

109 static void	114 static void

110 qsufsort(int I,int V,const unsigned char *old,int oldsize)	115 qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int old size)

111 {	116 {

112 int buckets[256];	117 int buckets[256];

113 int i,h,len;	118 int i,h,len;

114	119

115 for(i=0;i<256;i++) buckets[i]=0;	120 for(i=0;i<256;i++) buckets[i]=0;

116 for(i=0;i<oldsize;i++) buckets[old[i]]++;	121 for(i=0;i<oldsize;i++) buckets[old[i]]++;

117 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];	122 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];

118 for(i=255;i>0;i--) buckets[i]=buckets[i-1];	123 for(i=255;i>0;i--) buckets[i]=buckets[i-1];

119 buckets[0]=0;	124 buckets[0]=0;

120	125

(...skipping 29 matching lines...) Expand all Loading...
150 {	155 {

151 int i;	156 int i;

152	157

153 for(i=0;(i<oldsize)&&(i<newsize);i++)	158 for(i=0;(i<oldsize)&&(i<newsize);i++)

154 if(old[i]!=newbuf[i]) break;	159 if(old[i]!=newbuf[i]) break;

155	160

156 return i;	161 return i;

157 }	162 }

158	163

159 static int	164 static int

160 search(int I,const unsigned char old,int oldsize,	165 search(PagedArray<int>& I,const unsigned char *old,int oldsize,

161 const unsigned char newbuf,int newsize,int st,int en,int pos)	166 const unsigned char newbuf,int newsize,int st,int en,int pos)

162 {	167 {

163 int x,y;	168 int x,y;

164	169

165 if(en-st<2) {	170 if(en-st<2) {

166 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);	171 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);

167 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);	172 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);

168	173

169 if(x>y) {	174 if(x>y) {

170 *pos=I[st];	175 *pos=I[st];

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
207 SinkStream* control_stream_seeks = patch_streams.stream(2);	212 SinkStream* control_stream_seeks = patch_streams.stream(2);

208 SinkStream* diff_skips = patch_streams.stream(3);	213 SinkStream* diff_skips = patch_streams.stream(3);

209 SinkStream* diff_bytes = patch_streams.stream(4);	214 SinkStream* diff_bytes = patch_streams.stream(4);

210 SinkStream* extra_bytes = patch_streams.stream(5);	215 SinkStream* extra_bytes = patch_streams.stream(5);

211	216

212 const uint8* old = old_stream->Buffer();	217 const uint8* old = old_stream->Buffer();

213 const int oldsize = old_stream->Remaining();	218 const int oldsize = old_stream->Remaining();

214	219

215 uint32 pending_diff_zeros = 0;	220 uint32 pending_diff_zeros = 0;

216	221

217 scoped_array<int> I(new(std::nothrow) int[oldsize + 1]);	222 PagedArray<int> I;

218 scoped_array<int> V(new(std::nothrow) int[oldsize + 1]);	223 PagedArray<int> V;

219 if (I == NULL \|\| V == NULL)	224

	225 if (!I.Allocate(oldsize + 1)) {

	226 LOG(ERROR) << "Could not allocate I[], " << ((oldsize + 1) * sizeof(int))

	227 << " bytes";

220 return MEM_ERROR;	228 return MEM_ERROR;

	229 }

	230

	231 if (!V.Allocate(oldsize + 1)) {

	232 LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int))

	233 << " bytes";

	234 return MEM_ERROR;

	235 }

221	236

222 base::Time q_start_time = base::Time::Now();	237 base::Time q_start_time = base::Time::Now();

223 qsufsort(I.get(), V.get(), old, oldsize);	238 qsufsort(I, V, old, oldsize);

224 LOG(INFO) << " done qsufsort "	239 LOG(INFO) << " done qsufsort "

225 << (base::Time::Now() - q_start_time).InSecondsF();	240 << (base::Time::Now() - q_start_time).InSecondsF();

226 V.reset();	241 V.clear();

227	242

228 const uint8* newbuf = new_stream->Buffer();	243 const uint8* newbuf = new_stream->Buffer();

229 const int newsize = new_stream->Remaining();	244 const int newsize = new_stream->Remaining();

230	245

231 int control_length = 0;	246 int control_length = 0;

232 int diff_bytes_length = 0;	247 int diff_bytes_length = 0;

233 int diff_bytes_nonzero = 0;	248 int diff_bytes_nonzero = 0;

234 int extra_bytes_length = 0;	249 int extra_bytes_length = 0;

235	250

236 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is	251 // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
277 int scan = 0;	292 int scan = 0;

278 int match_length = 0;	293 int match_length = 0;

279	294

280 while (scan < newsize) {	295 while (scan < newsize) {

281 int pos = 0;	296 int pos = 0;

282 int oldscore = 0; // Count of how many bytes of the current match at \|scan\|	297 int oldscore = 0; // Count of how many bytes of the current match at \|scan\|

283 // extend the match at \|lastscan\|.	298 // extend the match at \|lastscan\|.

284	299

285 scan += match_length;	300 scan += match_length;

286 for (int scsc = scan; scan < newsize; ++scan) {	301 for (int scsc = scan; scan < newsize; ++scan) {

287 match_length = search(I.get(), old, oldsize,	302 match_length = search(I, old, oldsize,

288 newbuf + scan, newsize - scan,	303 newbuf + scan, newsize - scan,

289 0, oldsize, &pos);	304 0, oldsize, &pos);

290	305

291 for ( ; scsc < scan + match_length ; scsc++)	306 for ( ; scsc < scan + match_length ; scsc++)

292 if ((scsc + lastoffset < oldsize) &&	307 if ((scsc + lastoffset < oldsize) &&

293 (old[scsc + lastoffset] == newbuf[scsc]))	308 (old[scsc + lastoffset] == newbuf[scsc]))

294 oldscore++;	309 oldscore++;

295	310

296 if ((match_length == oldscore) && (match_length != 0))	311 if ((match_length == oldscore) && (match_length != 0))

297 break; // Good continuing match, case (1)	312 break; // Good continuing match, case (1)

(...skipping 91 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
389 #endif	404 #endif

390	405

391 lastscan = scan - lenb; // Include the backward extension in seed.	406 lastscan = scan - lenb; // Include the backward extension in seed.

392 lastpos = pos - lenb; // ditto.	407 lastpos = pos - lenb; // ditto.

393 lastoffset = lastpos - lastscan;	408 lastoffset = lastpos - lastscan;

394 }	409 }

395 }	410 }

396	411

397 diff_skips->WriteVarint32(pending_diff_zeros);	412 diff_skips->WriteVarint32(pending_diff_zeros);

398	413

399 I.reset();	414 I.clear();

400	415

401 MBSPatchHeader header;	416 MBSPatchHeader header;

402 // The string will have a null terminator that we don't use, hence '-1'.	417 // The string will have a null terminator that we don't use, hence '-1'.

403 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag),	418 COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag),

404 MBS_PATCH_HEADER_TAG_must_match_header_field_size);	419 MBS_PATCH_HEADER_TAG_must_match_header_field_size);

405 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag));	420 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag));

406 header.slen = oldsize;	421 header.slen = oldsize;

407 header.scrc32 = CalculateCrc(old, oldsize);	422 header.scrc32 = CalculateCrc(old, oldsize);

408 header.dlen = newsize;	423 header.dlen = newsize;

409	424

(...skipping 11 matching lines...) Expand all Loading...
421 LOG(INFO) << "Uncompressed bsdiff patch size "	436 LOG(INFO) << "Uncompressed bsdiff patch size "

422 << patch_stream->Length() - initial_patch_stream_length;	437 << patch_stream->Length() - initial_patch_stream_length;

423	438

424 LOG(INFO) << "End bsdiff "	439 LOG(INFO) << "End bsdiff "

425 << (base::Time::Now() - start_bsdiff_time).InSecondsF();	440 << (base::Time::Now() - start_bsdiff_time).InSecondsF();

426	441

427 return OK;	442 return OK;

428 }	443 }

429	444

430 } // namespace	445 } // namespace

OLD	NEW

« no previous file with comments | « courgette/courgette.gyp ('k') | courgette/third_party/paged_array.h » ('j') | no next file with comments »