| Index: courgette/third_party/bsdiff/bsdiff_search.h
|
| diff --git a/courgette/third_party/bsdiff/bsdiff_search.h b/courgette/third_party/bsdiff/bsdiff_search.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..690e47021b20675c72b2ec89e8789edbf0f7b540
|
| --- /dev/null
|
| +++ b/courgette/third_party/bsdiff/bsdiff_search.h
|
| @@ -0,0 +1,87 @@
|
| +/*
|
| + bsdiff_search.h -- Suffix array search.
|
| +
|
| + Copyright 2003 Colin Percival
|
| +
|
| + For the terms under which this work may be distributed, please see
|
| + the adjoining file "LICENSE".
|
| +
|
| + ChangeLog:
|
| + 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
|
| + values throughout.
|
| + --Benjamin Smedberg <benjamin@smedbergs.us>
|
| + 2015-08-03 - Change search() to template to allow PagedArray usage.
|
| + --Samuel Huang <huangs@chromium.org>
|
| + 2015-08-19 - Optimized search().
|
| + --Samuel Huang <huangs@chromium.org>
|
| + 2016-06-02 - Move matchlen() and search() to a separate file; comment; format.
|
| + --Samuel Huang <huangs@chromium.org>
|
| +*/
|
| +
|
| +#include <algorithm>
|
| +#include <cstring>
|
| +
|
| +namespace bsdiff {
|
| +
|
| +// Similar to ::memcmp(), but returns match length instead.
|
| +inline int matchlen(const unsigned char *srcbuf, const unsigned char *keybuf,
|
| + int size) {
|
| + for (int i = 0; i < size; ++i)
|
| + if (srcbuf[i] != keybuf[i])
|
| + return i;
|
| + return size;
|
| +}
|
| +
|
| +// Finds a suffix in |srcbuf| that has the longest common prefix with |keybuf|,
|
| +// aided by suffix array |I| of |srcbuf|. Returns the length of the match, and
|
| +// writes the position within |srcbuf| to |pos|.
|
| +template <class T>
|
| +int search(T I, const unsigned char *srcbuf, int srcize,
|
| + const unsigned char *keybuf, int keysize, int *pos) {
|
| + // We use binary search here, but the desired position is not the usual
|
| + // position from std::lower_bound()-like binary search.
|
| + // For example, given sorted suffixes of |srcbuf|:
|
| + // SAAA...
|
| + // SNNN...
|
| + // TAAA...
|
| + // Applying std::lower_bound() binary search results in:
|
| + // |keybuf| = "SNBB" => "SNNN...".
|
| + // |keybuf| = "SNZZ" => "TAAA...".
|
| + // But the desired result for both cases is "SNNN...", with "SN" as the
|
| + // longest common prefix. To solve this:
|
| + // Case 1: |keysize| == 0: We'd return 0, although |pos| is arbitrary.
|
| + // Case 2: |srcsize| == 0: We'd return 0, and |pos| = 0.
|
| + // Case 3: |srcsize| > 0 and |keysize| > 0: We'd assert rank-|lo| suffix
|
| + // of |srcbuf| is always strictly lexicographically less than |keybuf|. This
|
| + // holds for |lo| = 0, which corresponds to "". The search terminates when
|
| + // hi == lo + 1. At this point, suffixes at rank |lo| and rank |hi| both
|
| + // may be the desired match, so we check both. In case of tie, there's NO
|
| + // guarantee that we take the solution with minimum rank.
|
| + // Finding the minimum rank solution is not needed, and it would create more
|
| + // complexity. For example, with suffixes "MA...", "MB...", "MC...", "ME...",
|
| + // searching look for "MD" would yield |lo| -> "MC...", and the minimum rank
|
| + // solution "MA...", would be rather far away.
|
| + //
|
| + // TODO(huangs): Optimize this: match length at |t| is >= match length at
|
| + // |lo| and at |hi|. If we track this then we can skip comparisons!
|
| + int lo = 0;
|
| + int hi = srcize;
|
| + while (hi - lo > 1) {
|
| + int t = (lo + hi) / 2;
|
| + if (::memcmp(srcbuf + I[t], keybuf, std::min(srcize - I[t], keysize)) < 0)
|
| + lo = t;
|
| + else
|
| + hi = t;
|
| + }
|
| +
|
| + int x = matchlen(srcbuf + I[lo], keybuf, std::min(srcize - I[lo], keysize));
|
| + int y = matchlen(srcbuf + I[hi], keybuf, std::min(srcize - I[hi], keysize));
|
| + if (x > y) {
|
| + *pos = I[lo];
|
| + return x;
|
| + }
|
| + *pos = I[hi];
|
| + return y;
|
| +}
|
| +
|
| +} // namespace bsdiff
|
|
|