courgette/third_party/bsdiff/bsdiff_create.cc - Issue 1961963003: Move //courgette/third_party to subfolder.

Side by Side Diff: courgette/third_party/bsdiff/bsdiff_create.cc

Issue 1961963003: Move //courgette/third_party to subfolder. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Fixed compilation Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« courgette/third_party/bsdiff/bsdiff.h ('K') | « courgette/third_party/bsdiff/bsdiff_apply.cc ('k') | courgette/third_party/bsdiff/paged_array.h » ('j') | courgette/third_party/bsdiff/paged_array.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 bsdiff.c -- Binary patch generator.	2 bsdiff.c -- Binary patch generator.

3	3

4 Copyright 2003 Colin Percival	4 Copyright 2003 Colin Percival

5	5

6 For the terms under which this work may be distributed, please see	6 For the terms under which this work may be distributed, please see

7 the adjoining file "LICENSE".	7 the adjoining file "LICENSE".

8	8

9 ChangeLog:	9 ChangeLog:

10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit	10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit

11 values throughout.	11 values throughout.

12 --Benjamin Smedberg <benjamin@smedbergs.us>	12 --Benjamin Smedberg <benjamin@smedbergs.us>

13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table	13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table

14 provided by libbz2.	14 provided by libbz2.

15 --Darin Fisher <darin@meer.net>	15 --Darin Fisher <darin@meer.net>

16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library	16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library

17 --Rahul Kuchhal	17 --Rahul Kuchhal

18 2009-03-31 - Change to use Streams. Added lots of comments.	18 2009-03-31 - Change to use Streams. Added lots of comments.

19 --Stephen Adams <sra@chromium.org>	19 --Stephen Adams <sra@chromium.org>

20 2010-05-26 - Use a paged array for V and I. The address space may be too	20 2010-05-26 - Use a paged array for V and I. The address space may be too

21 fragmented for these big arrays to be contiguous.	21 fragmented for these big arrays to be contiguous.

22 --Stephen Adams <sra@chromium.org>	22 --Stephen Adams <sra@chromium.org>

23 2015-08-03 - Extract qsufsort portion to a separate file.	23 2015-08-03 - Extract qsufsort portion to a separate file.

24 --Samuel Huang <huangs@chromium.org>	24 --Samuel Huang <huangs@chromium.org>

25 2015-08-12 - Interface change to qsufsort search().	25 2015-08-12 - Interface change to qsufsort search().

26 --Samuel Huang <huangs@chromium.org>	26 --Samuel Huang <huangs@chromium.org>

27 */	27 */

28	28

29 #include "courgette/third_party/bsdiff.h"	29 #include "courgette/third_party/bsdiff/bsdiff.h"

30	30

31 #include <stddef.h>	31 #include <stddef.h>

32 #include <stdint.h>	32 #include <stdint.h>

33 #include <stdlib.h>	33 #include <stdlib.h>

34 #include <algorithm>	34 #include <algorithm>

35	35

36 #include "base/logging.h"	36 #include "base/logging.h"

37 #include "base/strings/string_util.h"	37 #include "base/strings/string_util.h"

38 #include "base/time/time.h"	38 #include "base/time/time.h"

39	39

40 #include "courgette/crc.h"	40 #include "courgette/crc.h"

41 #include "courgette/streams.h"	41 #include "courgette/streams.h"

42 #include "courgette/third_party/paged_array.h"	42 #include "courgette/third_party/bsdiff/paged_array.h"

43 #include "courgette/third_party/qsufsort.h"	43 #include "courgette/third_party/bsdiff/qsufsort.h"

44	44

45 namespace courgette {	45 namespace courgette {

46	46

47 static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) {	47 static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) {

48 bool ok = stream->Write(header->tag, sizeof(header->tag));	48 bool ok = stream->Write(header->tag, sizeof(header->tag));

49 ok &= stream->WriteVarint32(header->slen);	49 ok &= stream->WriteVarint32(header->slen);

50 ok &= stream->WriteVarint32(header->scrc32);	50 ok &= stream->WriteVarint32(header->scrc32);

51 ok &= stream->WriteVarint32(header->dlen);	51 ok &= stream->WriteVarint32(header->dlen);

52 return ok;	52 return ok;

53 }	53 }

54	54

55 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,	55 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,

56 SourceStream* new_stream,	56 SourceStream* new_stream,

57 SinkStream* patch_stream)	57 SinkStream* patch_stream) {

58 {

59 base::Time start_bsdiff_time = base::Time::Now();	58 base::Time start_bsdiff_time = base::Time::Now();

60 VLOG(1) << "Start bsdiff";	59 VLOG(1) << "Start bsdiff";

61 size_t initial_patch_stream_length = patch_stream->Length();	60 size_t initial_patch_stream_length = patch_stream->Length();

62	61

63 SinkStreamSet patch_streams;	62 SinkStreamSet patch_streams;

64 SinkStream* control_stream_copy_counts = patch_streams.stream(0);	63 SinkStream* control_stream_copy_counts = patch_streams.stream(0);

65 SinkStream* control_stream_extra_counts = patch_streams.stream(1);	64 SinkStream* control_stream_extra_counts = patch_streams.stream(1);

66 SinkStream* control_stream_seeks = patch_streams.stream(2);	65 SinkStream* control_stream_seeks = patch_streams.stream(2);

67 SinkStream* diff_skips = patch_streams.stream(3);	66 SinkStream* diff_skips = patch_streams.stream(3);

68 SinkStream* diff_bytes = patch_streams.stream(4);	67 SinkStream* diff_bytes = patch_streams.stream(4);

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
133 // ssssssssss Seed at \|lastscan\|	132 // ssssssssss Seed at \|lastscan\|

134 // xxyyyxxyyxy \|scan\| forward, cases (3)(x) & (1)(y)	133 // xxyyyxxyyxy \|scan\| forward, cases (3)(x) & (1)(y)

135 // mmmmmmmm New match will start new seed case (2).	134 // mmmmmmmm New match will start new seed case (2).

136 // fffffffffffffff \|lenf\| = scan forward from \|lastscan\|	135 // fffffffffffffff \|lenf\| = scan forward from \|lastscan\|

137 // bbbb \|lenb\| = scan back from new seed \|scan\|.	136 // bbbb \|lenb\| = scan back from new seed \|scan\|.

138 // ddddddddddddddd Emit diff bytes for the 'copy'.	137 // ddddddddddddddd Emit diff bytes for the 'copy'.

139 // xx Emit extra bytes.	138 // xx Emit extra bytes.

140 // ssssssssssss \|lastscan = scan - lenb\| is new seed.	139 // ssssssssssss \|lastscan = scan - lenb\| is new seed.

141 // x Cases (1) and (3) ....	140 // x Cases (1) and (3) ....

142	141

143

144 int lastscan = 0, lastpos = 0, lastoffset = 0;	142 int lastscan = 0, lastpos = 0, lastoffset = 0;

145	143

146 int scan = 0;	144 int scan = 0;

147 int match_length = 0;	145 int match_length = 0;

148	146

149 while (scan < newsize) {	147 while (scan < newsize) {

150 int pos = 0;	148 int pos = 0;

151 int oldscore = 0; // Count of how many bytes of the current match at \|scan\|	149 int oldscore = 0; // Count of how many bytes of the current match at \|scan\|

152 // extend the match at \|lastscan\|.	150 // extend the match at \|lastscan\|.

153	151

154 scan += match_length;	152 scan += match_length;

155 for (int scsc = scan; scan < newsize; ++scan) {	153 for (int scsc = scan; scan < newsize; ++scan) {

156 match_length = qsuf::search<PagedArray<int>&>(	154 match_length = qsuf::search<PagedArray<int>&>(

157 I, old, oldsize, newbuf + scan, newsize - scan, &pos);	155 I, old, oldsize, newbuf + scan, newsize - scan, &pos);

158	156

159 for ( ; scsc < scan + match_length ; scsc++)	157 for (; scsc < scan + match_length; scsc++)

160 if ((scsc + lastoffset < oldsize) &&	158 if ((scsc + lastoffset < oldsize) &&

161 (old[scsc + lastoffset] == newbuf[scsc]))	159 (old[scsc + lastoffset] == newbuf[scsc]))

162 oldscore++;	160 oldscore++;

163	161

164 if ((match_length == oldscore) && (match_length != 0))	162 if ((match_length == oldscore) && (match_length != 0))

165 break; // Good continuing match, case (1)	163 break; // Good continuing match, case (1)

166 if (match_length > oldscore + 8)	164 if (match_length > oldscore + 8)

167 break; // New seed match, case (2)	165 break; // New seed match, case (2)

168	166

169 if ((scan + lastoffset < oldsize) &&	167 if ((scan + lastoffset < oldsize) &&

170 (old[scan + lastoffset] == newbuf[scan]))	168 (old[scan + lastoffset] == newbuf[scan]))

171 oldscore--;	169 oldscore--;

172 // Case (3) continues in this loop until we fall out of the loop (4).	170 // Case (3) continues in this loop until we fall out of the loop (4).

173 }	171 }

174	172

175 if ((match_length != oldscore) \|\| (scan == newsize)) { // Cases (2) and (4)	173 if ((match_length != oldscore) \|\| (scan == newsize)) { // Cases (2) and (4)

176 // This next chunk of code finds the boundary between the bytes to be	174 // This next chunk of code finds the boundary between the bytes to be

177 // copied as part of the current triple, and the bytes to be copied as	175 // copied as part of the current triple, and the bytes to be copied as

178 // part of the next triple. The \|lastscan\| match is extended forwards as	176 // part of the next triple. The \|lastscan\| match is extended forwards as

179 // far as possible provided doing to does not add too many mistakes. The	177 // far as possible provided doing to does not add too many mistakes. The

180 // \|scan\| match is extended backwards in a similar way.	178 // \|scan\| match is extended backwards in a similar way.

181	179

182 // Extend the current match (if any) backwards. \|lenb\| is the maximal	180 // Extend the current match (if any) backwards. \|lenb\| is the maximal

183 // extension for which less than half the byte positions in the extension	181 // extension for which less than half the byte positions in the extension

184 // are wrong.	182 // are wrong.

185 int lenb = 0;	183 int lenb = 0;

186 if (scan < newsize) { // i.e. not case (4); there is a match to extend.	184 if (scan < newsize) { // i.e. not case (4); there is a match to extend.

187 int score = 0, Sb = 0;	185 int score = 0, Sb = 0;

188 for (int i = 1; (scan >= lastscan + i) && (pos >= i); i++) {	186 for (int i = 1; (scan >= lastscan + i) && (pos >= i); i++) {

189 if (old[pos - i] == newbuf[scan - i]) score++;	187 if (old[pos - i] == newbuf[scan - i])

190 if (score2 - i > Sb2 - lenb) { Sb = score; lenb = i; }	188 score++;

	189 if (score * 2 - i > Sb * 2 - lenb) {

	190 Sb = score;

	191 lenb = i;

	192 }

191 }	193 }

192 }	194 }

193	195

194 // Extend the lastscan match forward; \|lenf\| is the maximal extension for	196 // Extend the lastscan match forward; \|lenf\| is the maximal extension for

195 // which less than half of the byte positions in entire lastscan match are	197 // which less than half of the byte positions in entire lastscan match are

196 // wrong. There is a subtle point here: \|lastscan\| points to before the	198 // wrong. There is a subtle point here: \|lastscan\| points to before the

197 // seed match by \|lenb\| bytes from the previous iteration. This is why	199 // seed match by \|lenb\| bytes from the previous iteration. This is why

198 // the loop measures the total number of mistakes in the the match, not	200 // the loop measures the total number of mistakes in the the match, not

199 // just the from the match.	201 // just the from the match.

200 int lenf = 0;	202 int lenf = 0;

201 {	203 {

202 int score = 0, Sf = 0;	204 int score = 0, Sf = 0;

203 for (int i = 0; (lastscan + i < scan) && (lastpos + i < oldsize); ) {	205 for (int i = 0; (lastscan + i < scan) && (lastpos + i < oldsize);) {

204 if (old[lastpos + i] == newbuf[lastscan + i]) score++;	206 if (old[lastpos + i] == newbuf[lastscan + i])

	207 score++;

205 i++;	208 i++;

206 if (score2 - i > Sf2 - lenf) { Sf = score; lenf = i; }	209 if (score * 2 - i > Sf * 2 - lenf) {

	210 Sf = score;

	211 lenf = i;

	212 }

207 }	213 }

208 }	214 }

209	215

210 // If the extended scans overlap, pick a position in the overlap region	216 // If the extended scans overlap, pick a position in the overlap region

211 // that maximizes the exact matching bytes.	217 // that maximizes the exact matching bytes.

212 if (lastscan + lenf > scan - lenb) {	218 if (lastscan + lenf > scan - lenb) {

213 int overlap = (lastscan + lenf) - (scan - lenb);	219 int overlap = (lastscan + lenf) - (scan - lenb);

214 int score = 0;	220 int score = 0;

215 int Ss = 0, lens = 0;	221 int Ss = 0, lens = 0;

216 for (int i = 0; i < overlap; i++) {	222 for (int i = 0; i < overlap; i++) {

217 if (newbuf[lastscan + lenf - overlap + i] ==	223 if (newbuf[lastscan + lenf - overlap + i] ==

218 old[lastpos + lenf - overlap + i]) score++;	224 old[lastpos + lenf - overlap + i])
	huangs 2016/05/10 18:17:02 { } { } altimin 2016/05/11 17:48:47 Done. Show quoted text On 2016/05/10 18:17:02, huangs wrote: > { } Done.
219 if (newbuf[scan - lenb + i] == old[pos - lenb + i]) score--;	225 score++;

220 if (score > Ss) { Ss = score; lens = i + 1; }	226 if (newbuf[scan - lenb + i] == old[pos - lenb + i])

	227 score--;

	228 if (score > Ss) {

	229 Ss = score;

	230 lens = i + 1;

	231 }

221 }	232 }

222	233

223 lenf += lens - overlap;	234 lenf += lens - overlap;

224 lenb -= lens;	235 lenb -= lens;

225 };	236 };

226	237

227 for (int i = 0; i < lenf; i++) {	238 for (int i = 0; i < lenf; i++) {

228 uint8_t diff_byte = newbuf[lastscan + i] - old[lastpos + i];	239 uint8_t diff_byte = newbuf[lastscan + i] - old[lastpos + i];

229 if (diff_byte) {	240 if (diff_byte) {

230 ++diff_bytes_nonzero;	241 ++diff_bytes_nonzero;

231 if (!diff_skips->WriteVarint32(pending_diff_zeros))	242 if (!diff_skips->WriteVarint32(pending_diff_zeros))

232 return MEM_ERROR;	243 return MEM_ERROR;

233 pending_diff_zeros = 0;	244 pending_diff_zeros = 0;

234 if (!diff_bytes->Write(&diff_byte, 1))	245 if (!diff_bytes->Write(&diff_byte, 1))

235 return MEM_ERROR;	246 return MEM_ERROR;

236 } else {	247 } else {

237 ++pending_diff_zeros;	248 ++pending_diff_zeros;

238 }	249 }

239 }	250 }

240 int gap = (scan - lenb) - (lastscan + lenf);	251 int gap = (scan - lenb) - (lastscan + lenf);

241 for (int i = 0; i < gap; i++) {	252 for (int i = 0; i < gap; i++) {

242 if (!extra_bytes->Write(&newbuf[lastscan + lenf + i], 1))	253 if (!extra_bytes->Write(&newbuf[lastscan + lenf + i], 1))

243 return MEM_ERROR;	254 return MEM_ERROR;

244 }	255 }

245	256

246 diff_bytes_length += lenf;	257 diff_bytes_length += lenf;

247 extra_bytes_length += gap;	258 extra_bytes_length += gap;

248	259

249 uint32_t copy_count = lenf;	260 uint32_t copy_count = lenf;

250 uint32_t extra_count = gap;	261 uint32_t extra_count = gap;

251 int32_t seek_adjustment = ((pos - lenb) - (lastpos + lenf));	262 int32_t seek_adjustment = ((pos - lenb) - (lastpos + lenf));

252	263

253 if (!control_stream_copy_counts->WriteVarint32(copy_count) \|\|	264 if (!control_stream_copy_counts->WriteVarint32(copy_count) \|\|

254 !control_stream_extra_counts->WriteVarint32(extra_count) \|\|	265 !control_stream_extra_counts->WriteVarint32(extra_count) \|\|

255 !control_stream_seeks->WriteVarint32Signed(seek_adjustment)) {	266 !control_stream_seeks->WriteVarint32Signed(seek_adjustment)) {

256 return MEM_ERROR;	267 return MEM_ERROR;

257 }	268 }

258	269

259 ++control_length;	270 ++control_length;

260 #ifdef DEBUG_bsmedberg	271 #ifdef DEBUG_bsmedberg

261 VLOG(1) << StringPrintf("Writing a block: copy: %-8u extra: %-8u seek: "	272 VLOG(1) << StringPrintf(

262 "%+-9d", copy_count, extra_count,	273 "Writing a block: copy: %-8u extra: %-8u seek: "
	huangs 2016/05/10 18:17:02 Unwrap and concatenate. Unwrap and concatenate. altimin 2016/05/11 17:48:47 Done. Show quoted text On 2016/05/10 18:17:02, huangs wrote: > Unwrap and concatenate. Done.
263 seek_adjustment);	274 "%+-9d",

	275 copy_count, extra_count, seek_adjustment);

264 #endif	276 #endif

265	277

266 lastscan = scan - lenb; // Include the backward extension in seed.	278 lastscan = scan - lenb; // Include the backward extension in seed.

267 lastpos = pos - lenb; // ditto.	279 lastpos = pos - lenb; // ditto.

268 lastoffset = lastpos - lastscan;	280 lastoffset = lastpos - lastscan;

269 }	281 }

270 }	282 }

271	283

272 if (!diff_skips->WriteVarint32(pending_diff_zeros))	284 if (!diff_skips->WriteVarint32(pending_diff_zeros))

273 return MEM_ERROR;	285 return MEM_ERROR;

274	286

275 I.clear();	287 I.clear();

276	288

277 MBSPatchHeader header;	289 MBSPatchHeader header;

278 // The string will have a null terminator that we don't use, hence '-1'.	290 // The string will have a null terminator that we don't use, hence '-1'.

279 static_assert(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag),	291 static_assert(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag),

280 "MBS_PATCH_HEADER_TAG must match header field size");	292 "MBS_PATCH_HEADER_TAG must match header field size");

281 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag));	293 memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag));

282 header.slen = oldsize;	294 header.slen = oldsize;

283 header.scrc32 = CalculateCrc(old, oldsize);	295 header.scrc32 = CalculateCrc(old, oldsize);

284 header.dlen = newsize;	296 header.dlen = newsize;

285	297

286 if (!WriteHeader(patch_stream, &header))	298 if (!WriteHeader(patch_stream, &header))

287 return MEM_ERROR;	299 return MEM_ERROR;

288	300

289 size_t diff_skips_length = diff_skips->Length();	301 size_t diff_skips_length = diff_skips->Length();

290 if (!patch_streams.CopyTo(patch_stream))	302 if (!patch_streams.CopyTo(patch_stream))

291 return MEM_ERROR;	303 return MEM_ERROR;

292	304

293 VLOG(1) << "Control tuples: " << control_length	305 VLOG(1) << "Control tuples: " << control_length

294 << " copy bytes: " << diff_bytes_length	306 << " copy bytes: " << diff_bytes_length

295 << " mistakes: " << diff_bytes_nonzero	307 << " mistakes: " << diff_bytes_nonzero

296 << " (skips: " << diff_skips_length << ")"	308 << " (skips: " << diff_skips_length << ")"

297 << " extra bytes: " << extra_bytes_length	309 << " extra bytes: " << extra_bytes_length

298 << "\nUncompressed bsdiff patch size "	310 << "\nUncompressed bsdiff patch size "

299 << patch_stream->Length() - initial_patch_stream_length	311 << patch_stream->Length() - initial_patch_stream_length

300 << "\nEnd bsdiff "	312 << "\nEnd bsdiff "

301 << (base::Time::Now() - start_bsdiff_time).InSecondsF();	313 << (base::Time::Now() - start_bsdiff_time).InSecondsF();

302	314

303 return OK;	315 return OK;

304 }	316 }

305	317

306 } // namespace courgette	318 } // namespace courgette

OLD	NEW