Index: courgette/third_party/bsdiff_create.cc |
diff --git a/courgette/third_party/bsdiff_create.cc b/courgette/third_party/bsdiff_create.cc |
index b43b53a4cd4290e46ee0e9441ada04d83f3cd9ec..d6ea69564ed8d577583ef6e752788767e1f60657 100644 |
--- a/courgette/third_party/bsdiff_create.cc |
+++ b/courgette/third_party/bsdiff_create.cc |
@@ -20,6 +20,8 @@ |
2010-05-26 - Use a paged array for V and I. The address space may be too |
fragmented for these big arrays to be contiguous. |
--Stephen Adams <sra@chromium.org> |
+ 2015-08-03 - Extract qsufsort portion to a separate file. |
+ --Samuel Huang <huangs@chromium.org> |
*/ |
#include "courgette/third_party/bsdiff.h" |
@@ -35,162 +37,10 @@ |
#include "courgette/crc.h" |
#include "courgette/streams.h" |
#include "courgette/third_party/paged_array.h" |
+#include "courgette/third_party/qsufsort.h" |
namespace courgette { |
-// ------------------------------------------------------------------------ |
-// |
-// The following code is taken verbatim from 'bsdiff.c'. Please keep all the |
-// code formatting and variable names. The changes from the original are (1) |
-// replacing tabs with spaces, (2) indentation, (3) using 'const', and (4) |
-// changing the V and I parameters from int* to PagedArray<int>&. |
-// |
-// The code appears to be a rewritten version of the suffix array algorithm |
-// presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko |
-// Sadakane, special cased for bytes. |
- |
-static void |
-split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h) |
-{ |
- int i,j,k,x,tmp,jj,kk; |
- |
- if(len<16) { |
- for(k=start;k<start+len;k+=j) { |
- j=1;x=V[I[k]+h]; |
- for(i=1;k+i<start+len;i++) { |
- if(V[I[k+i]+h]<x) { |
- x=V[I[k+i]+h]; |
- j=0; |
- }; |
- if(V[I[k+i]+h]==x) { |
- tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; |
- j++; |
- }; |
- }; |
- for(i=0;i<j;i++) V[I[k+i]]=k+j-1; |
- if(j==1) I[k]=-1; |
- }; |
- return; |
- }; |
- |
- x=V[I[start+len/2]+h]; |
- jj=0;kk=0; |
- for(i=start;i<start+len;i++) { |
- if(V[I[i]+h]<x) jj++; |
- if(V[I[i]+h]==x) kk++; |
- }; |
- jj+=start;kk+=jj; |
- |
- i=start;j=0;k=0; |
- while(i<jj) { |
- if(V[I[i]+h]<x) { |
- i++; |
- } else if(V[I[i]+h]==x) { |
- tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; |
- j++; |
- } else { |
- tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; |
- k++; |
- }; |
- }; |
- |
- while(jj+j<kk) { |
- if(V[I[jj+j]+h]==x) { |
- j++; |
- } else { |
- tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; |
- k++; |
- }; |
- }; |
- |
- if(jj>start) split(I,V,start,jj-start,h); |
- |
- for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; |
- if(jj==kk-1) I[jj]=-1; |
- |
- if(start+len>kk) split(I,V,kk,start+len-kk,h); |
-} |
- |
-static void |
-qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int oldsize) |
-{ |
- int buckets[256]; |
- int i,h,len; |
- |
- for(i=0;i<256;i++) buckets[i]=0; |
- for(i=0;i<oldsize;i++) buckets[old[i]]++; |
- for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; |
- for(i=255;i>0;i--) buckets[i]=buckets[i-1]; |
- buckets[0]=0; |
- |
- for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; |
- I[0]=oldsize; |
- for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; |
- V[oldsize]=0; |
- for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; |
- I[0]=-1; |
- |
- for(h=1;I[0]!=-(oldsize+1);h+=h) { |
- len=0; |
- for(i=0;i<oldsize+1;) { |
- if(I[i]<0) { |
- len-=I[i]; |
- i-=I[i]; |
- } else { |
- if(len) I[i-len]=-len; |
- len=V[I[i]]+1-i; |
- split(I,V,i,len,h); |
- i+=len; |
- len=0; |
- }; |
- }; |
- if(len) I[i-len]=-len; |
- }; |
- |
- for(i=0;i<oldsize+1;i++) I[V[i]]=i; |
-} |
- |
-static int |
-matchlen(const unsigned char *old,int oldsize,const unsigned char *newbuf,int newsize) |
-{ |
- int i; |
- |
- for(i=0;(i<oldsize)&&(i<newsize);i++) |
- if(old[i]!=newbuf[i]) break; |
- |
- return i; |
-} |
- |
-static int |
-search(PagedArray<int>& I,const unsigned char *old,int oldsize, |
- const unsigned char *newbuf,int newsize,int st,int en,int *pos) |
-{ |
- int x,y; |
- |
- if(en-st<2) { |
- x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); |
- y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); |
- |
- if(x>y) { |
- *pos=I[st]; |
- return x; |
- } else { |
- *pos=I[en]; |
- return y; |
- } |
- } |
- |
- x=st+(en-st)/2; |
- if(memcmp(old+I[x],newbuf,std::min(oldsize-I[x],newsize))<0) { |
- return search(I,old,oldsize,newbuf,newsize,x,en,pos); |
- } else { |
- return search(I,old,oldsize,newbuf,newsize,st,x,pos); |
- } |
-} |
- |
-// End of 'verbatim' code. |
-// ------------------------------------------------------------------------ |
- |
static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) { |
bool ok = stream->Write(header->tag, sizeof(header->tag)); |
ok &= stream->WriteVarint32(header->slen); |
@@ -236,7 +86,7 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, |
} |
base::Time q_start_time = base::Time::Now(); |
- qsufsort(I, V, old, oldsize); |
+ qsuf::qsufsort<PagedArray<int>&>(I, V, old, oldsize); |
VLOG(1) << " done qsufsort " |
<< (base::Time::Now() - q_start_time).InSecondsF(); |
V.clear(); |
@@ -300,9 +150,8 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, |
scan += match_length; |
for (int scsc = scan; scan < newsize; ++scan) { |
- match_length = search(I, old, oldsize, |
- newbuf + scan, newsize - scan, |
- 0, oldsize, &pos); |
+ match_length = qsuf::search<PagedArray<int>&>( |
+ I, old, oldsize, newbuf + scan, newsize - scan, 0, oldsize, &pos); |
for ( ; scsc < scan + match_length ; scsc++) |
if ((scsc + lastoffset < oldsize) && |
@@ -451,4 +300,4 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, |
return OK; |
} |
-} // namespace |
+} // namespace courgette |