courgette/third_party/bsdiff_create.cc - Issue 1272453003: [Courgette] Extract qsufsort module and add tests.

Side by Side Diff: courgette/third_party/bsdiff_create.cc

Issue 1272453003: [Courgette] Extract qsufsort module and add tests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Fix comment re. checklicenses.py UNKNOWN usage. Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 bsdiff.c -- Binary patch generator.	2 bsdiff.c -- Binary patch generator.

3	3

4 Copyright 2003 Colin Percival	4 Copyright 2003 Colin Percival

5	5

6 For the terms under which this work may be distributed, please see	6 For the terms under which this work may be distributed, please see

7 the adjoining file "LICENSE".	7 the adjoining file "LICENSE".

8	8

9 ChangeLog:	9 ChangeLog:

10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit	10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit

11 values throughout.	11 values throughout.

12 --Benjamin Smedberg <benjamin@smedbergs.us>	12 --Benjamin Smedberg <benjamin@smedbergs.us>

13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table	13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table

14 provided by libbz2.	14 provided by libbz2.

15 --Darin Fisher <darin@meer.net>	15 --Darin Fisher <darin@meer.net>

16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library	16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library

17 --Rahul Kuchhal	17 --Rahul Kuchhal

18 2009-03-31 - Change to use Streams. Added lots of comments.	18 2009-03-31 - Change to use Streams. Added lots of comments.

19 --Stephen Adams <sra@chromium.org>	19 --Stephen Adams <sra@chromium.org>

20 2010-05-26 - Use a paged array for V and I. The address space may be too	20 2010-05-26 - Use a paged array for V and I. The address space may be too

21 fragmented for these big arrays to be contiguous.	21 fragmented for these big arrays to be contiguous.

22 --Stephen Adams <sra@chromium.org>	22 --Stephen Adams <sra@chromium.org>

	23 2015-08-03 - Extract qsufsort portion to a separate file.

	24 --Samuel Huang <huangs@chromium.org>

23 */	25 */

24	26

25 #include "courgette/third_party/bsdiff.h"	27 #include "courgette/third_party/bsdiff.h"

26	28

27 #include <stdlib.h>	29 #include <stdlib.h>

28 #include <algorithm>	30 #include <algorithm>

29	31

30 #include "base/logging.h"	32 #include "base/logging.h"

31 #include "base/memory/scoped_ptr.h"	33 #include "base/memory/scoped_ptr.h"

32 #include "base/strings/string_util.h"	34 #include "base/strings/string_util.h"

33 #include "base/time/time.h"	35 #include "base/time/time.h"

34	36

35 #include "courgette/crc.h"	37 #include "courgette/crc.h"

36 #include "courgette/streams.h"	38 #include "courgette/streams.h"

37 #include "courgette/third_party/paged_array.h"	39 #include "courgette/third_party/paged_array.h"

	40 #include "courgette/third_party/qsufsort.h"

38	41

39 namespace courgette {	42 namespace courgette {

40	43

41 // ------------------------------------------------------------------------

42 //

43 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the

44 // code formatting and variable names. The changes from the original are (1)

45 // replacing tabs with spaces, (2) indentation, (3) using 'const', and (4)

46 // changing the V and I parameters from int* to PagedArray<int>&.

47 //

48 // The code appears to be a rewritten version of the suffix array algorithm

49 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko

50 // Sadakane, special cased for bytes.

51

52 static void

53 split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h)

54 {

55 int i,j,k,x,tmp,jj,kk;

56

57 if(len<16) {

58 for(k=start;k<start+len;k+=j) {

59 j=1;x=V[I[k]+h];

60 for(i=1;k+i<start+len;i++) {

61 if(V[I[k+i]+h]<x) {

62 x=V[I[k+i]+h];

63 j=0;

64 };

65 if(V[I[k+i]+h]==x) {

66 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;

67 j++;

68 };

69 };

70 for(i=0;i<j;i++) V[I[k+i]]=k+j-1;

71 if(j==1) I[k]=-1;

72 };

73 return;

74 };

75

76 x=V[I[start+len/2]+h];

77 jj=0;kk=0;

78 for(i=start;i<start+len;i++) {

79 if(V[I[i]+h]<x) jj++;

80 if(V[I[i]+h]==x) kk++;

81 };

82 jj+=start;kk+=jj;

83

84 i=start;j=0;k=0;

85 while(i<jj) {

86 if(V[I[i]+h]<x) {

87 i++;

88 } else if(V[I[i]+h]==x) {

89 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;

90 j++;

91 } else {

92 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;

93 k++;

94 };

95 };

96

97 while(jj+j<kk) {

98 if(V[I[jj+j]+h]==x) {

99 j++;

100 } else {

101 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;

102 k++;

103 };

104 };

105

106 if(jj>start) split(I,V,start,jj-start,h);

107

108 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;

109 if(jj==kk-1) I[jj]=-1;

110

111 if(start+len>kk) split(I,V,kk,start+len-kk,h);

112 }

113

114 static void

115 qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int old size)

116 {

117 int buckets[256];

118 int i,h,len;

119

120 for(i=0;i<256;i++) buckets[i]=0;

121 for(i=0;i<oldsize;i++) buckets[old[i]]++;

122 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];

123 for(i=255;i>0;i--) buckets[i]=buckets[i-1];

124 buckets[0]=0;

125

126 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;

127 I[0]=oldsize;

128 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];

129 V[oldsize]=0;

130 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;

131 I[0]=-1;

132

133 for(h=1;I[0]!=-(oldsize+1);h+=h) {

134 len=0;

135 for(i=0;i<oldsize+1;) {

136 if(I[i]<0) {

137 len-=I[i];

138 i-=I[i];

139 } else {

140 if(len) I[i-len]=-len;

141 len=V[I[i]]+1-i;

142 split(I,V,i,len,h);

143 i+=len;

144 len=0;

145 };

146 };

147 if(len) I[i-len]=-len;

148 };

149

150 for(i=0;i<oldsize+1;i++) I[V[i]]=i;

151 }

152

153 static int

154 matchlen(const unsigned char old,int oldsize,const unsigned char newbuf,int ne wsize)

155 {

156 int i;

157

158 for(i=0;(i<oldsize)&&(i<newsize);i++)

159 if(old[i]!=newbuf[i]) break;

160

161 return i;

162 }

163

164 static int

165 search(PagedArray<int>& I,const unsigned char *old,int oldsize,

166 const unsigned char newbuf,int newsize,int st,int en,int pos)

167 {

168 int x,y;

169

170 if(en-st<2) {

171 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);

172 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);

173

174 if(x>y) {

175 *pos=I[st];

176 return x;

177 } else {

178 *pos=I[en];

179 return y;

180 }

181 }

182

183 x=st+(en-st)/2;

184 if(memcmp(old+I[x],newbuf,std::min(oldsize-I[x],newsize))<0) {

185 return search(I,old,oldsize,newbuf,newsize,x,en,pos);

186 } else {

187 return search(I,old,oldsize,newbuf,newsize,st,x,pos);

188 }

189 }

190

191 // End of 'verbatim' code.

192 // ------------------------------------------------------------------------

193

194 static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) {	44 static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) {

195 bool ok = stream->Write(header->tag, sizeof(header->tag));	45 bool ok = stream->Write(header->tag, sizeof(header->tag));

196 ok &= stream->WriteVarint32(header->slen);	46 ok &= stream->WriteVarint32(header->slen);

197 ok &= stream->WriteVarint32(header->scrc32);	47 ok &= stream->WriteVarint32(header->scrc32);

198 ok &= stream->WriteVarint32(header->dlen);	48 ok &= stream->WriteVarint32(header->dlen);

199 return ok;	49 return ok;

200 }	50 }

201	51

202 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,	52 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,

203 SourceStream* new_stream,	53 SourceStream* new_stream,

(...skipping 25 matching lines...) Expand all Loading...
229 return MEM_ERROR;	79 return MEM_ERROR;

230 }	80 }

231	81

232 if (!V.Allocate(oldsize + 1)) {	82 if (!V.Allocate(oldsize + 1)) {

233 LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int))	83 LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int))

234 << " bytes";	84 << " bytes";

235 return MEM_ERROR;	85 return MEM_ERROR;

236 }	86 }

237	87

238 base::Time q_start_time = base::Time::Now();	88 base::Time q_start_time = base::Time::Now();

239 qsufsort(I, V, old, oldsize);	89 qsuf::qsufsort<PagedArray<int>&>(I, V, old, oldsize);

240 VLOG(1) << " done qsufsort "	90 VLOG(1) << " done qsufsort "

241 << (base::Time::Now() - q_start_time).InSecondsF();	91 << (base::Time::Now() - q_start_time).InSecondsF();

242 V.clear();	92 V.clear();

243	93

244 const uint8* newbuf = new_stream->Buffer();	94 const uint8* newbuf = new_stream->Buffer();

245 const int newsize = static_cast<int>(new_stream->Remaining());	95 const int newsize = static_cast<int>(new_stream->Remaining());

246	96

247 int control_length = 0;	97 int control_length = 0;

248 int diff_bytes_length = 0;	98 int diff_bytes_length = 0;

249 int diff_bytes_nonzero = 0;	99 int diff_bytes_nonzero = 0;

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
293 int scan = 0;	143 int scan = 0;

294 int match_length = 0;	144 int match_length = 0;

295	145

296 while (scan < newsize) {	146 while (scan < newsize) {

297 int pos = 0;	147 int pos = 0;

298 int oldscore = 0; // Count of how many bytes of the current match at \|scan\|	148 int oldscore = 0; // Count of how many bytes of the current match at \|scan\|

299 // extend the match at \|lastscan\|.	149 // extend the match at \|lastscan\|.

300	150

301 scan += match_length;	151 scan += match_length;

302 for (int scsc = scan; scan < newsize; ++scan) {	152 for (int scsc = scan; scan < newsize; ++scan) {

303 match_length = search(I, old, oldsize,	153 match_length = qsuf::search<PagedArray<int>&>(

304 newbuf + scan, newsize - scan,	154 I, old, oldsize, newbuf + scan, newsize - scan, 0, oldsize, &pos);

305 0, oldsize, &pos);

306	155

307 for ( ; scsc < scan + match_length ; scsc++)	156 for ( ; scsc < scan + match_length ; scsc++)

308 if ((scsc + lastoffset < oldsize) &&	157 if ((scsc + lastoffset < oldsize) &&

309 (old[scsc + lastoffset] == newbuf[scsc]))	158 (old[scsc + lastoffset] == newbuf[scsc]))

310 oldscore++;	159 oldscore++;

311	160

312 if ((match_length == oldscore) && (match_length != 0))	161 if ((match_length == oldscore) && (match_length != 0))

313 break; // Good continuing match, case (1)	162 break; // Good continuing match, case (1)

314 if (match_length > oldscore + 8)	163 if (match_length > oldscore + 8)

315 break; // New seed match, case (2)	164 break; // New seed match, case (2)

(...skipping 128 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
444 << " (skips: " << diff_skips_length << ")"	293 << " (skips: " << diff_skips_length << ")"

445 << " extra bytes: " << extra_bytes_length	294 << " extra bytes: " << extra_bytes_length

446 << "\nUncompressed bsdiff patch size "	295 << "\nUncompressed bsdiff patch size "

447 << patch_stream->Length() - initial_patch_stream_length	296 << patch_stream->Length() - initial_patch_stream_length

448 << "\nEnd bsdiff "	297 << "\nEnd bsdiff "

449 << (base::Time::Now() - start_bsdiff_time).InSecondsF();	298 << (base::Time::Now() - start_bsdiff_time).InSecondsF();

450	299

451 return OK;	300 return OK;

452 }	301 }

453	302

454 } // namespace	303 } // namespace courgette

OLD	NEW

« no previous file with comments | « courgette/third_party/README.chromium ('k') | courgette/third_party/qsufsort.h » ('j') | courgette/third_party/qsufsort_unittest.cc » ('J')