Index: courgette/third_party/bsdiff/bsdiff_search.h |
diff --git a/courgette/third_party/bsdiff/bsdiff_search.h b/courgette/third_party/bsdiff/bsdiff_search.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..690e47021b20675c72b2ec89e8789edbf0f7b540 |
--- /dev/null |
+++ b/courgette/third_party/bsdiff/bsdiff_search.h |
@@ -0,0 +1,87 @@ |
+/* |
+ bsdiff_search.h -- Suffix array search. |
+ |
+ Copyright 2003 Colin Percival |
+ |
+ For the terms under which this work may be distributed, please see |
+ the adjoining file "LICENSE". |
+ |
+ ChangeLog: |
+ 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit |
+ values throughout. |
+ --Benjamin Smedberg <benjamin@smedbergs.us> |
+ 2015-08-03 - Change search() to template to allow PagedArray usage. |
+ --Samuel Huang <huangs@chromium.org> |
+ 2015-08-19 - Optimized search(). |
+ --Samuel Huang <huangs@chromium.org> |
+ 2016-06-02 - Move matchlen() and search() to a separate file; comment; format. |
+ --Samuel Huang <huangs@chromium.org> |
+*/ |
+ |
+#include <algorithm> |
+#include <cstring> |
+ |
+namespace bsdiff { |
+ |
+// Similar to ::memcmp(), but returns match length instead. |
+inline int matchlen(const unsigned char *srcbuf, const unsigned char *keybuf, |
+ int size) { |
+ for (int i = 0; i < size; ++i) |
+ if (srcbuf[i] != keybuf[i]) |
+ return i; |
+ return size; |
+} |
+ |
+// Finds a suffix in |srcbuf| that has the longest common prefix with |keybuf|, |
+// aided by suffix array |I| of |srcbuf|. Returns the length of the match, and |
+// writes the position within |srcbuf| to |pos|. |
+template <class T> |
+int search(T I, const unsigned char *srcbuf, int srcize, |
+ const unsigned char *keybuf, int keysize, int *pos) { |
+ // We use binary search here, but the desired position is not the usual |
+ // position from std::lower_bound()-like binary search. |
+ // For example, given sorted suffixes of |srcbuf|: |
+ // SAAA... |
+ // SNNN... |
+ // TAAA... |
+ // Applying std::lower_bound() binary search results in: |
+ // |keybuf| = "SNBB" => "SNNN...". |
+ // |keybuf| = "SNZZ" => "TAAA...". |
+ // But the desired result for both cases is "SNNN...", with "SN" as the |
+ // longest common prefix. To solve this: |
+ // Case 1: |keysize| == 0: We'd return 0, although |pos| is arbitrary. |
+ // Case 2: |srcsize| == 0: We'd return 0, and |pos| = 0. |
+ // Case 3: |srcsize| > 0 and |keysize| > 0: We'd assert rank-|lo| suffix |
+ // of |srcbuf| is always strictly lexicographically less than |keybuf|. This |
+ // holds for |lo| = 0, which corresponds to "". The search terminates when |
+ // hi == lo + 1. At this point, suffixes at rank |lo| and rank |hi| both |
+ // may be the desired match, so we check both. In case of tie, there's NO |
+ // guarantee that we take the solution with minimum rank. |
+ // Finding the minimum rank solution is not needed, and it would create more |
+ // complexity. For example, with suffixes "MA...", "MB...", "MC...", "ME...", |
+ // searching look for "MD" would yield |lo| -> "MC...", and the minimum rank |
+ // solution "MA...", would be rather far away. |
+ // |
+ // TODO(huangs): Optimize this: match length at |t| is >= match length at |
+ // |lo| and at |hi|. If we track this then we can skip comparisons! |
+ int lo = 0; |
+ int hi = srcize; |
+ while (hi - lo > 1) { |
+ int t = (lo + hi) / 2; |
+ if (::memcmp(srcbuf + I[t], keybuf, std::min(srcize - I[t], keysize)) < 0) |
+ lo = t; |
+ else |
+ hi = t; |
+ } |
+ |
+ int x = matchlen(srcbuf + I[lo], keybuf, std::min(srcize - I[lo], keysize)); |
+ int y = matchlen(srcbuf + I[hi], keybuf, std::min(srcize - I[hi], keysize)); |
+ if (x > y) { |
+ *pos = I[lo]; |
+ return x; |
+ } |
+ *pos = I[hi]; |
+ return y; |
+} |
+ |
+} // namespace bsdiff |