| OLD | NEW |
| 1 /* | 1 /* |
| 2 bsdiff.c -- Binary patch generator. | 2 bsdiff.c -- Binary patch generator. |
| 3 | 3 |
| 4 Copyright 2003 Colin Percival | 4 Copyright 2003 Colin Percival |
| 5 | 5 |
| 6 For the terms under which this work may be distributed, please see | 6 For the terms under which this work may be distributed, please see |
| 7 the adjoining file "LICENSE". | 7 the adjoining file "LICENSE". |
| 8 | 8 |
| 9 ChangeLog: | 9 ChangeLog: |
| 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit | 10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit |
| 11 values throughout. | 11 values throughout. |
| 12 --Benjamin Smedberg <benjamin@smedbergs.us> | 12 --Benjamin Smedberg <benjamin@smedbergs.us> |
| 13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table | 13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table |
| 14 provided by libbz2. | 14 provided by libbz2. |
| 15 --Darin Fisher <darin@meer.net> | 15 --Darin Fisher <darin@meer.net> |
| 16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library | 16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library |
| 17 --Rahul Kuchhal | 17 --Rahul Kuchhal |
| 18 2009-03-31 - Change to use Streams. Added lots of comments. | 18 2009-03-31 - Change to use Streams. Added lots of comments. |
| 19 --Stephen Adams <sra@chromium.org> | 19 --Stephen Adams <sra@chromium.org> |
| 20 2010-05-26 - Use a paged array for V and I. The address space may be too | 20 2010-05-26 - Use a paged array for V and I. The address space may be too |
| 21 fragmented for these big arrays to be contiguous. | 21 fragmented for these big arrays to be contiguous. |
| 22 --Stephen Adams <sra@chromium.org> | 22 --Stephen Adams <sra@chromium.org> |
| 23 2015-08-03 - Extract qsufsort portion to a separate file. |
| 24 --Samuel Huang <huangs@chromium.org> |
| 23 */ | 25 */ |
| 24 | 26 |
| 25 #include "courgette/third_party/bsdiff.h" | 27 #include "courgette/third_party/bsdiff.h" |
| 26 | 28 |
| 27 #include <stdlib.h> | 29 #include <stdlib.h> |
| 28 #include <algorithm> | 30 #include <algorithm> |
| 29 | 31 |
| 30 #include "base/logging.h" | 32 #include "base/logging.h" |
| 31 #include "base/memory/scoped_ptr.h" | 33 #include "base/memory/scoped_ptr.h" |
| 32 #include "base/strings/string_util.h" | 34 #include "base/strings/string_util.h" |
| 33 #include "base/time/time.h" | 35 #include "base/time/time.h" |
| 34 | 36 |
| 35 #include "courgette/crc.h" | 37 #include "courgette/crc.h" |
| 36 #include "courgette/streams.h" | 38 #include "courgette/streams.h" |
| 37 #include "courgette/third_party/paged_array.h" | 39 #include "courgette/third_party/paged_array.h" |
| 40 #include "courgette/third_party/qsufsort.h" |
| 38 | 41 |
| 39 namespace courgette { | 42 namespace courgette { |
| 40 | 43 |
| 41 // ------------------------------------------------------------------------ | |
| 42 // | |
| 43 // The following code is taken verbatim from 'bsdiff.c'. Please keep all the | |
| 44 // code formatting and variable names. The changes from the original are (1) | |
| 45 // replacing tabs with spaces, (2) indentation, (3) using 'const', and (4) | |
| 46 // changing the V and I parameters from int* to PagedArray<int>&. | |
| 47 // | |
| 48 // The code appears to be a rewritten version of the suffix array algorithm | |
| 49 // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko | |
| 50 // Sadakane, special cased for bytes. | |
| 51 | |
| 52 static void | |
| 53 split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h) | |
| 54 { | |
| 55 int i,j,k,x,tmp,jj,kk; | |
| 56 | |
| 57 if(len<16) { | |
| 58 for(k=start;k<start+len;k+=j) { | |
| 59 j=1;x=V[I[k]+h]; | |
| 60 for(i=1;k+i<start+len;i++) { | |
| 61 if(V[I[k+i]+h]<x) { | |
| 62 x=V[I[k+i]+h]; | |
| 63 j=0; | |
| 64 }; | |
| 65 if(V[I[k+i]+h]==x) { | |
| 66 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; | |
| 67 j++; | |
| 68 }; | |
| 69 }; | |
| 70 for(i=0;i<j;i++) V[I[k+i]]=k+j-1; | |
| 71 if(j==1) I[k]=-1; | |
| 72 }; | |
| 73 return; | |
| 74 }; | |
| 75 | |
| 76 x=V[I[start+len/2]+h]; | |
| 77 jj=0;kk=0; | |
| 78 for(i=start;i<start+len;i++) { | |
| 79 if(V[I[i]+h]<x) jj++; | |
| 80 if(V[I[i]+h]==x) kk++; | |
| 81 }; | |
| 82 jj+=start;kk+=jj; | |
| 83 | |
| 84 i=start;j=0;k=0; | |
| 85 while(i<jj) { | |
| 86 if(V[I[i]+h]<x) { | |
| 87 i++; | |
| 88 } else if(V[I[i]+h]==x) { | |
| 89 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; | |
| 90 j++; | |
| 91 } else { | |
| 92 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; | |
| 93 k++; | |
| 94 }; | |
| 95 }; | |
| 96 | |
| 97 while(jj+j<kk) { | |
| 98 if(V[I[jj+j]+h]==x) { | |
| 99 j++; | |
| 100 } else { | |
| 101 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; | |
| 102 k++; | |
| 103 }; | |
| 104 }; | |
| 105 | |
| 106 if(jj>start) split(I,V,start,jj-start,h); | |
| 107 | |
| 108 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; | |
| 109 if(jj==kk-1) I[jj]=-1; | |
| 110 | |
| 111 if(start+len>kk) split(I,V,kk,start+len-kk,h); | |
| 112 } | |
| 113 | |
| 114 static void | |
| 115 qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int old
size) | |
| 116 { | |
| 117 int buckets[256]; | |
| 118 int i,h,len; | |
| 119 | |
| 120 for(i=0;i<256;i++) buckets[i]=0; | |
| 121 for(i=0;i<oldsize;i++) buckets[old[i]]++; | |
| 122 for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; | |
| 123 for(i=255;i>0;i--) buckets[i]=buckets[i-1]; | |
| 124 buckets[0]=0; | |
| 125 | |
| 126 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; | |
| 127 I[0]=oldsize; | |
| 128 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; | |
| 129 V[oldsize]=0; | |
| 130 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; | |
| 131 I[0]=-1; | |
| 132 | |
| 133 for(h=1;I[0]!=-(oldsize+1);h+=h) { | |
| 134 len=0; | |
| 135 for(i=0;i<oldsize+1;) { | |
| 136 if(I[i]<0) { | |
| 137 len-=I[i]; | |
| 138 i-=I[i]; | |
| 139 } else { | |
| 140 if(len) I[i-len]=-len; | |
| 141 len=V[I[i]]+1-i; | |
| 142 split(I,V,i,len,h); | |
| 143 i+=len; | |
| 144 len=0; | |
| 145 }; | |
| 146 }; | |
| 147 if(len) I[i-len]=-len; | |
| 148 }; | |
| 149 | |
| 150 for(i=0;i<oldsize+1;i++) I[V[i]]=i; | |
| 151 } | |
| 152 | |
| 153 static int | |
| 154 matchlen(const unsigned char *old,int oldsize,const unsigned char *newbuf,int ne
wsize) | |
| 155 { | |
| 156 int i; | |
| 157 | |
| 158 for(i=0;(i<oldsize)&&(i<newsize);i++) | |
| 159 if(old[i]!=newbuf[i]) break; | |
| 160 | |
| 161 return i; | |
| 162 } | |
| 163 | |
| 164 static int | |
| 165 search(PagedArray<int>& I,const unsigned char *old,int oldsize, | |
| 166 const unsigned char *newbuf,int newsize,int st,int en,int *pos) | |
| 167 { | |
| 168 int x,y; | |
| 169 | |
| 170 if(en-st<2) { | |
| 171 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); | |
| 172 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); | |
| 173 | |
| 174 if(x>y) { | |
| 175 *pos=I[st]; | |
| 176 return x; | |
| 177 } else { | |
| 178 *pos=I[en]; | |
| 179 return y; | |
| 180 } | |
| 181 } | |
| 182 | |
| 183 x=st+(en-st)/2; | |
| 184 if(memcmp(old+I[x],newbuf,std::min(oldsize-I[x],newsize))<0) { | |
| 185 return search(I,old,oldsize,newbuf,newsize,x,en,pos); | |
| 186 } else { | |
| 187 return search(I,old,oldsize,newbuf,newsize,st,x,pos); | |
| 188 } | |
| 189 } | |
| 190 | |
| 191 // End of 'verbatim' code. | |
| 192 // ------------------------------------------------------------------------ | |
| 193 | |
| 194 static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) { | 44 static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) { |
| 195 bool ok = stream->Write(header->tag, sizeof(header->tag)); | 45 bool ok = stream->Write(header->tag, sizeof(header->tag)); |
| 196 ok &= stream->WriteVarint32(header->slen); | 46 ok &= stream->WriteVarint32(header->slen); |
| 197 ok &= stream->WriteVarint32(header->scrc32); | 47 ok &= stream->WriteVarint32(header->scrc32); |
| 198 ok &= stream->WriteVarint32(header->dlen); | 48 ok &= stream->WriteVarint32(header->dlen); |
| 199 return ok; | 49 return ok; |
| 200 } | 50 } |
| 201 | 51 |
| 202 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, | 52 BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, |
| 203 SourceStream* new_stream, | 53 SourceStream* new_stream, |
| (...skipping 25 matching lines...) Expand all Loading... |
| 229 return MEM_ERROR; | 79 return MEM_ERROR; |
| 230 } | 80 } |
| 231 | 81 |
| 232 if (!V.Allocate(oldsize + 1)) { | 82 if (!V.Allocate(oldsize + 1)) { |
| 233 LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int)) | 83 LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int)) |
| 234 << " bytes"; | 84 << " bytes"; |
| 235 return MEM_ERROR; | 85 return MEM_ERROR; |
| 236 } | 86 } |
| 237 | 87 |
| 238 base::Time q_start_time = base::Time::Now(); | 88 base::Time q_start_time = base::Time::Now(); |
| 239 qsufsort(I, V, old, oldsize); | 89 qsuf::qsufsort<PagedArray<int>&>(I, V, old, oldsize); |
| 240 VLOG(1) << " done qsufsort " | 90 VLOG(1) << " done qsufsort " |
| 241 << (base::Time::Now() - q_start_time).InSecondsF(); | 91 << (base::Time::Now() - q_start_time).InSecondsF(); |
| 242 V.clear(); | 92 V.clear(); |
| 243 | 93 |
| 244 const uint8* newbuf = new_stream->Buffer(); | 94 const uint8* newbuf = new_stream->Buffer(); |
| 245 const int newsize = static_cast<int>(new_stream->Remaining()); | 95 const int newsize = static_cast<int>(new_stream->Remaining()); |
| 246 | 96 |
| 247 int control_length = 0; | 97 int control_length = 0; |
| 248 int diff_bytes_length = 0; | 98 int diff_bytes_length = 0; |
| 249 int diff_bytes_nonzero = 0; | 99 int diff_bytes_nonzero = 0; |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 293 int scan = 0; | 143 int scan = 0; |
| 294 int match_length = 0; | 144 int match_length = 0; |
| 295 | 145 |
| 296 while (scan < newsize) { | 146 while (scan < newsize) { |
| 297 int pos = 0; | 147 int pos = 0; |
| 298 int oldscore = 0; // Count of how many bytes of the current match at |scan| | 148 int oldscore = 0; // Count of how many bytes of the current match at |scan| |
| 299 // extend the match at |lastscan|. | 149 // extend the match at |lastscan|. |
| 300 | 150 |
| 301 scan += match_length; | 151 scan += match_length; |
| 302 for (int scsc = scan; scan < newsize; ++scan) { | 152 for (int scsc = scan; scan < newsize; ++scan) { |
| 303 match_length = search(I, old, oldsize, | 153 match_length = qsuf::search<PagedArray<int>&>( |
| 304 newbuf + scan, newsize - scan, | 154 I, old, oldsize, newbuf + scan, newsize - scan, 0, oldsize, &pos); |
| 305 0, oldsize, &pos); | |
| 306 | 155 |
| 307 for ( ; scsc < scan + match_length ; scsc++) | 156 for ( ; scsc < scan + match_length ; scsc++) |
| 308 if ((scsc + lastoffset < oldsize) && | 157 if ((scsc + lastoffset < oldsize) && |
| 309 (old[scsc + lastoffset] == newbuf[scsc])) | 158 (old[scsc + lastoffset] == newbuf[scsc])) |
| 310 oldscore++; | 159 oldscore++; |
| 311 | 160 |
| 312 if ((match_length == oldscore) && (match_length != 0)) | 161 if ((match_length == oldscore) && (match_length != 0)) |
| 313 break; // Good continuing match, case (1) | 162 break; // Good continuing match, case (1) |
| 314 if (match_length > oldscore + 8) | 163 if (match_length > oldscore + 8) |
| 315 break; // New seed match, case (2) | 164 break; // New seed match, case (2) |
| (...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 444 << " (skips: " << diff_skips_length << ")" | 293 << " (skips: " << diff_skips_length << ")" |
| 445 << " extra bytes: " << extra_bytes_length | 294 << " extra bytes: " << extra_bytes_length |
| 446 << "\nUncompressed bsdiff patch size " | 295 << "\nUncompressed bsdiff patch size " |
| 447 << patch_stream->Length() - initial_patch_stream_length | 296 << patch_stream->Length() - initial_patch_stream_length |
| 448 << "\nEnd bsdiff " | 297 << "\nEnd bsdiff " |
| 449 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); | 298 << (base::Time::Now() - start_bsdiff_time).InSecondsF(); |
| 450 | 299 |
| 451 return OK; | 300 return OK; |
| 452 } | 301 } |
| 453 | 302 |
| 454 } // namespace | 303 } // namespace courgette |
| OLD | NEW |