third_party/brotli/enc/hash.h - Issue 2537133002: Update brotli to v1.0.0-snapshot.

Unified Diff: third_party/brotli/enc/hash.h

Issue 2537133002: Update brotli to v1.0.0-snapshot. (Closed)

Patch Set: Fixed typo Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/brotli/enc/hash.h

diff --git a/third_party/brotli/enc/hash.h b/third_party/brotli/enc/hash.h

index 227be1d1f6f546da77e4f23a237b562fd791525e..0d8e61c002ac347223969bb0a5e1fc10e5599ae0 100644

--- a/third_party/brotli/enc/hash.h

+++ b/third_party/brotli/enc/hash.h

@@ -4,30 +4,32 @@

See file LICENSE for detail or copy at https://opensource.org/licenses/MIT

-// A (forgetful) hash table to the data seen by the compressor, to

-// help create backward references to previous data.

+/* A (forgetful) hash table to the data seen by the compressor, to

+ help create backward references to previous data. */

#ifndef BROTLI_ENC_HASH_H_

#define BROTLI_ENC_HASH_H_

-#include <sys/types.h>

-#include <algorithm>

-#include <cstring>

-#include <limits>

+#include <string.h> /* memcmp, memset */

+#include "../common/constants.h"

+#include "../common/dictionary.h"

+#include <brotli/types.h>

#include "./dictionary_hash.h"

#include "./fast_log.h"

#include "./find_match_length.h"

+#include "./memory.h"

#include "./port.h"

-#include "./prefix.h"

+#include "./quality.h"

#include "./static_dict.h"

-#include "./transform.h"

-#include "./types.h"

-namespace brotli {

+#if defined(__cplusplus) || defined(c_plusplus)

+extern "C" {

+#endif

-static const size_t kMaxTreeSearchDepth = 64;

-static const size_t kMaxTreeCompLength = 128;

+#define MAX_TREE_SEARCH_DEPTH 64

+#define MAX_TREE_COMP_LENGTH 128

+#define score_t size_t

static const uint32_t kDistanceCacheIndex[] = {

0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,

@@ -41,934 +43,686 @@ static const uint8_t kCutoffTransforms[] = {

0, 12, 27, 23, 42, 63, 56, 48, 59, 64

};

-// kHashMul32 multiplier has these properties:

-// * The multiplier must be odd. Otherwise we may lose the highest bit.

-// * No long streaks of 1s or 0s.

-// * There is no effort to ensure that it is a prime, the oddity is enough

-// for this use.

-// * The number has been tuned heuristically against compression benchmarks.

+typedef struct HasherSearchResult {

+ size_t len;

+ size_t len_x_code; /* == len ^ len_code */

+ size_t distance;

+ score_t score;

+} HasherSearchResult;

+typedef struct DictionarySearchStatictics {

+ size_t num_lookups;

+ size_t num_matches;

+} DictionarySearchStatictics;

+/* kHashMul32 multiplier has these properties:

+ * The multiplier must be odd. Otherwise we may lose the highest bit.

+ * No long streaks of ones or zeros.

+ * There is no effort to ensure that it is a prime, the oddity is enough

+ for this use.

+ * The number has been tuned heuristically against compression benchmarks. */

static const uint32_t kHashMul32 = 0x1e35a7bd;

-template<int kShiftBits>

-inline uint32_t Hash(const uint8_t *data) {

+static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {

uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;

- // The higher bits contain more mixture from the multiplication,

- // so we take our results from there.

- return h >> (32 - kShiftBits);

+ /* The higher bits contain more mixture from the multiplication,

+ so we take our results from there. */

+ return h >> (32 - 14);

}

-// Usually, we always choose the longest backward reference. This function

-// allows for the exception of that rule.

-//

-// If we choose a backward reference that is further away, it will

-// usually be coded with more bits. We approximate this by assuming

-// log2(distance). If the distance can be expressed in terms of the

-// last four distances, we use some heuristic constants to estimate

-// the bits cost. For the first up to four literals we use the bit

-// cost of the literals from the literal cost model, after that we

-// use the average bit cost of the cost model.

-//

-// This function is used to sometimes discard a longer backward reference

-// when it is not much longer and the bit cost for encoding it is more

-// than the saved literals.

-//

-// backward_reference_offset MUST be positive.

-inline double BackwardReferenceScore(size_t copy_length,

- size_t backward_reference_offset) {

- return 5.4 * static_cast<double>(copy_length) -

- 1.20 * Log2FloorNonZero(backward_reference_offset);

+#define BROTLI_LITERAL_BYTE_SCORE 540

+#define BROTLI_DISTANCE_BIT_PENALTY 120

+/* Score must be positive after applying maximal penalty. */

+#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))

+/* Usually, we always choose the longest backward reference. This function

+ allows for the exception of that rule.

+ If we choose a backward reference that is further away, it will

+ usually be coded with more bits. We approximate this by assuming

+ log2(distance). If the distance can be expressed in terms of the

+ last four distances, we use some heuristic constants to estimate

+ the bits cost. For the first up to four literals we use the bit

+ cost of the literals from the literal cost model, after that we

+ use the average bit cost of the cost model.

+ This function is used to sometimes discard a longer backward reference

+ when it is not much longer and the bit cost for encoding it is more

+ than the saved literals.

+ backward_reference_offset MUST be positive. */

+static BROTLI_INLINE score_t BackwardReferenceScore(

+ size_t copy_length, size_t backward_reference_offset) {

+ return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -

+ BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);

}

-inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,

- size_t distance_short_code) {

- static const double kDistanceShortCodeBitCost[16] = {

- -0.6, 0.95, 1.17, 1.27,

- 0.93, 0.93, 0.96, 0.96, 0.99, 0.99,

- 1.05, 1.05, 1.15, 1.15, 1.25, 1.25

- };

- return 5.4 * static_cast<double>(copy_length) -

- kDistanceShortCodeBitCost[distance_short_code];

-struct BackwardMatch {

- BackwardMatch(void) : distance(0), length_and_code(0) {}

- BackwardMatch(size_t dist, size_t len)

- : distance(static_cast<uint32_t>(dist))

- , length_and_code(static_cast<uint32_t>(len << 5)) {}

- BackwardMatch(size_t dist, size_t len, size_t len_code)

- : distance(static_cast<uint32_t>(dist))

- , length_and_code(static_cast<uint32_t>(

- (len << 5) | (len == len_code ? 0 : len_code))) {}

- size_t length(void) const {

- return length_and_code >> 5;

- }

- size_t length_code(void) const {

- size_t code = length_and_code & 31;

- return code ? code : length();

- }

- uint32_t distance;

- uint32_t length_and_code;

+static const score_t kDistanceShortCodeCost[BROTLI_NUM_DISTANCE_SHORT_CODES] = {

+ /* Repeat last */

+ BROTLI_SCORE_BASE + 60,

+ /* 2nd, 3rd, 4th last */

+ BROTLI_SCORE_BASE - 95,

+ BROTLI_SCORE_BASE - 117,

+ BROTLI_SCORE_BASE - 127,

+ /* Last with offset */

+ BROTLI_SCORE_BASE - 93,

+ BROTLI_SCORE_BASE - 96,

+ BROTLI_SCORE_BASE - 99,

+ /* 2nd last with offset */

+ BROTLI_SCORE_BASE - 105,

+ BROTLI_SCORE_BASE - 115,

+ BROTLI_SCORE_BASE - 125,

+ BROTLI_SCORE_BASE - 125

};

-// A (forgetful) hash table to the data seen by the compressor, to

-// help create backward references to previous data.

-//

-// This is a hash map of fixed size (kBucketSize). Starting from the

-// given index, kBucketSweep buckets are used to store values of a key.

-template <int kBucketBits, int kBucketSweep, bool kUseDictionary>

-class HashLongestMatchQuickly {

- public:

- HashLongestMatchQuickly(void) {

- Reset();

- }

- void Reset(void) {

- need_init_ = true;

- num_dict_lookups_ = 0;

- num_dict_matches_ = 0;

- }

- void Init(void) {

- if (need_init_) {

- // It is not strictly necessary to fill this buffer here, but

- // not filling will make the results of the compression stochastic

- // (but correct). This is because random data would cause the

- // system to find accidentally good backward references here and there.

- memset(&buckets_[0], 0, sizeof(buckets_));

- need_init_ = false;

- }

- void InitForData(const uint8_t* data, size_t num) {

- for (size_t i = 0; i < num; ++i) {

- const uint32_t key = HashBytes(&data[i]);

- memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0]));

- need_init_ = false;

- }

- // Look at 4 bytes at data.

- // Compute a hash from these, and store the value somewhere within

- // [ix .. ix+3].

- inline void Store(const uint8_t *data, const uint32_t ix) {

- const uint32_t key = HashBytes(data);

- // Wiggle the value with the bucket sweep range.

- const uint32_t off = (ix >> 3) % kBucketSweep;

- buckets_[key + off] = ix;

- }

- // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]

- // up to the length of max_length and stores the position cur_ix in the

- // hash table.

- //

- // Does not look for matches longer than max_length.

- // Does not look for matches further away than max_backward.

- // Writes the best found match length into best_len_out.

- // Writes the index (&data[index]) of the start of the best match into

- // best_distance_out.

- inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,

- const size_t ring_buffer_mask,

- const int* __restrict distance_cache,

- const size_t cur_ix,

- const size_t max_length,

- const size_t max_backward,

- size_t * __restrict best_len_out,

- size_t * __restrict best_len_code_out,

- size_t * __restrict best_distance_out,

- double* __restrict best_score_out) {

- const size_t best_len_in = *best_len_out;

- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

- const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);

- int compare_char = ring_buffer[cur_ix_masked + best_len_in];

- double best_score = *best_score_out;

- size_t best_len = best_len_in;

- size_t cached_backward = static_cast<size_t>(distance_cache[0]);

- size_t prev_ix = cur_ix - cached_backward;

- bool match_found = false;

- if (prev_ix < cur_ix) {

- prev_ix &= static_cast<uint32_t>(ring_buffer_mask);

- if (compare_char == ring_buffer[prev_ix + best_len]) {

- size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],

- &ring_buffer[cur_ix_masked],

- max_length);

- if (len >= 4) {

- best_score = BackwardReferenceScoreUsingLastDistance(len, 0);

- best_len = len;

- *best_len_out = len;

- *best_len_code_out = len;

- *best_distance_out = cached_backward;

- *best_score_out = best_score;

- compare_char = ring_buffer[cur_ix_masked + best_len];

- if (kBucketSweep == 1) {

- buckets_[key] = static_cast<uint32_t>(cur_ix);

- return true;

- } else {

- match_found = true;

- }

- if (kBucketSweep == 1) {

- // Only one to look for, don't bother to prepare for a loop.

- prev_ix = buckets_[key];

- buckets_[key] = static_cast<uint32_t>(cur_ix);

- size_t backward = cur_ix - prev_ix;

- prev_ix &= static_cast<uint32_t>(ring_buffer_mask);

- if (compare_char != ring_buffer[prev_ix + best_len_in]) {

- return false;

- }

- if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {

- return false;

- }

- const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],

- &ring_buffer[cur_ix_masked],

- max_length);

- if (len >= 4) {

- *best_len_out = len;

- *best_len_code_out = len;

- *best_distance_out = backward;

- *best_score_out = BackwardReferenceScore(len, backward);

- return true;

- }

- } else {

- uint32_t *bucket = buckets_ + key;

- prev_ix = *bucket++;

- for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {

- const size_t backward = cur_ix - prev_ix;

- prev_ix &= static_cast<uint32_t>(ring_buffer_mask);

- if (compare_char != ring_buffer[prev_ix + best_len]) {

- continue;

- }

- if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {

- continue;

- }

- const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],

- &ring_buffer[cur_ix_masked],

- max_length);

- if (len >= 4) {

- const double score = BackwardReferenceScore(len, backward);

- if (best_score < score) {

- best_score = score;

- best_len = len;

- *best_len_out = best_len;

- *best_len_code_out = best_len;

- *best_distance_out = backward;

- *best_score_out = score;

- compare_char = ring_buffer[cur_ix_masked + best_len];

- match_found = true;

- }

- if (kUseDictionary && !match_found &&

- num_dict_matches_ >= (num_dict_lookups_ >> 7)) {

- ++num_dict_lookups_;

- const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;

- const uint16_t v = kStaticDictionaryHash[dict_key];

- if (v > 0) {

- const uint32_t len = v & 31;

- const uint32_t dist = v >> 5;

- const size_t offset =

- kBrotliDictionaryOffsetsByLength[len] + len * dist;

- if (len <= max_length) {

- const size_t matchlen =

- FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],

- &kBrotliDictionary[offset], len);

- if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {

- const size_t transform_id = kCutoffTransforms[len - matchlen];

- const size_t word_id =

- transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +

- dist;

- const size_t backward = max_backward + word_id + 1;

- const double score = BackwardReferenceScore(matchlen, backward);

- if (best_score < score) {

- ++num_dict_matches_;

- best_score = score;

- best_len = matchlen;

- *best_len_out = best_len;

- *best_len_code_out = len;

- *best_distance_out = backward;

- *best_score_out = best_score;

- match_found = true;

- }

- const uint32_t off = (cur_ix >> 3) % kBucketSweep;

- buckets_[key + off] = static_cast<uint32_t>(cur_ix);

- return match_found;

- }

- enum { kHashLength = 5 };

- enum { kHashTypeLength = 8 };

- // HashBytes is the function that chooses the bucket to place

- // the address in. The HashLongestMatch and HashLongestMatchQuickly

- // classes have separate, different implementations of hashing.

- static uint32_t HashBytes(const uint8_t *data) {

- // Computing a hash based on 5 bytes works much better for

- // qualities 1 and 3, where the next hash value is likely to replace

- uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;

- // The higher bits contain more mixture from the multiplication,

- // so we take our results from there.

- return static_cast<uint32_t>(h >> (64 - kBucketBits));

- }

- enum { kHashMapSize = 4 << kBucketBits };

- private:

- static const uint32_t kBucketSize = 1 << kBucketBits;

- uint32_t buckets_[kBucketSize + kBucketSweep];

- // True if buckets_ array needs to be initialized.

- bool need_init_;

- size_t num_dict_lookups_;

- size_t num_dict_matches_;

-};

+static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(

+ size_t copy_length, size_t distance_short_code) {

+ return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +

+ kDistanceShortCodeCost[distance_short_code];

-// A (forgetful) hash table to the data seen by the compressor, to

-// help create backward references to previous data.

-//

-// This is a hash map of fixed size (kBucketSize) to a ring buffer of

-// fixed size (kBlockSize). The ring buffer contains the last kBlockSize

-// index positions of the given hash key in the compressed data.

-template <int kBucketBits,

- int kBlockBits,

- int kNumLastDistancesToCheck>

-class HashLongestMatch {

- public:

- HashLongestMatch(void) {

- Reset();

- }

+static BROTLI_INLINE void DictionarySearchStaticticsReset(

+ DictionarySearchStatictics* self) {

+ self->num_lookups = 0;

+ self->num_matches = 0;

- void Reset(void) {

- need_init_ = true;

- num_dict_lookups_ = 0;

- num_dict_matches_ = 0;

- }

+static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(

+ size_t item, const uint8_t* data, size_t max_length, size_t max_backward,

+ HasherSearchResult* out) {

+ size_t len;

+ size_t dist;

+ size_t offset;

+ size_t matchlen;

+ size_t backward;

+ score_t score;

+ len = item & 31;

+ dist = item >> 5;

+ offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;

+ if (len > max_length) {

+ return BROTLI_FALSE;

+ }

+ matchlen = FindMatchLengthWithLimit(data, &kBrotliDictionary[offset], len);

+ if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {

+ return BROTLI_FALSE;

+ }

+ {

+ size_t transform_id = kCutoffTransforms[len - matchlen];

+ backward = max_backward + dist + 1 +

+ (transform_id << kBrotliDictionarySizeBitsByLength[len]);

+ }

+ score = BackwardReferenceScore(matchlen, backward);

+ if (score < out->score) {

+ return BROTLI_FALSE;

+ }

+ out->len = matchlen;

+ out->len_x_code = len ^ matchlen;

+ out->distance = backward;

+ out->score = score;

+ return BROTLI_TRUE;

- void Init(void) {

- if (need_init_) {

- memset(&num_[0], 0, sizeof(num_));

- need_init_ = false;

+static BROTLI_INLINE BROTLI_BOOL SearchInStaticDictionary(

+ DictionarySearchStatictics* self, const uint8_t* data, size_t max_length,

+ size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {

+ size_t key;

+ size_t i;

+ BROTLI_BOOL is_match_found = BROTLI_FALSE;

+ if (self->num_matches < (self->num_lookups >> 7)) {

+ return BROTLI_FALSE;

+ }

+ key = Hash14(data) << 1;

+ for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {

+ size_t item = kStaticDictionaryHash[key];

+ self->num_lookups++;

+ if (item != 0 &&

+ TestStaticDictionaryItem(item, data, max_length, max_backward, out)) {

+ self->num_matches++;

+ is_match_found = BROTLI_TRUE;

}

+ return is_match_found;

- void InitForData(const uint8_t* data, size_t num) {

- for (size_t i = 0; i < num; ++i) {

- const uint32_t key = HashBytes(&data[i]);

- num_[key] = 0;

- need_init_ = false;

- }

+typedef struct BackwardMatch {

+ uint32_t distance;

+ uint32_t length_and_code;

+} BackwardMatch;

- // Look at 3 bytes at data.

- // Compute a hash from these, and store the value of ix at that position.

- inline void Store(const uint8_t *data, const uint32_t ix) {

- const uint32_t key = HashBytes(data);

- const int minor_ix = num_[key] & kBlockMask;

- buckets_[key][minor_ix] = ix;

- ++num_[key];

- }

+static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,

+ size_t dist, size_t len) {

+ self->distance = (uint32_t)dist;

+ self->length_and_code = (uint32_t)(len << 5);

- // Find a longest backward match of &data[cur_ix] up to the length of

- // max_length and stores the position cur_ix in the hash table.

- //

- // Does not look for matches longer than max_length.

- // Does not look for matches further away than max_backward.

- // Writes the best found match length into best_len_out.

- // Writes the index (&data[index]) offset from the start of the best match

- // into best_distance_out.

- // Write the score of the best match into best_score_out.

- bool FindLongestMatch(const uint8_t * __restrict data,

- const size_t ring_buffer_mask,

- const int* __restrict distance_cache,

- const size_t cur_ix,

- const size_t max_length,

- const size_t max_backward,

- size_t * __restrict best_len_out,

- size_t * __restrict best_len_code_out,

- size_t * __restrict best_distance_out,

- double * __restrict best_score_out) {

- *best_len_code_out = 0;

- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

- bool match_found = false;

- // Don't accept a short copy from far away.

- double best_score = *best_score_out;

- size_t best_len = *best_len_out;

- *best_len_out = 0;

- // Try last distance first.

- for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {

- const size_t idx = kDistanceCacheIndex[i];

- const size_t backward =

- static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);

- size_t prev_ix = static_cast<size_t>(cur_ix - backward);

- if (prev_ix >= cur_ix) {

- continue;

- }

- if (PREDICT_FALSE(backward > max_backward)) {

- continue;

- }

- prev_ix &= ring_buffer_mask;

+static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,

+ size_t dist, size_t len, size_t len_code) {

+ self->distance = (uint32_t)dist;

+ self->length_and_code =

+ (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));

- if (cur_ix_masked + best_len > ring_buffer_mask ||

- prev_ix + best_len > ring_buffer_mask ||

- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {

- continue;

- }

- const size_t len = FindMatchLengthWithLimit(&data[prev_ix],

- &data[cur_ix_masked],

- max_length);

- if (len >= 3 || (len == 2 && i < 2)) {

- // Comparing for >= 2 does not change the semantics, but just saves for

- // a few unnecessary binary logarithms in backward reference score,

- // since we are not interested in such short matches.

- double score = BackwardReferenceScoreUsingLastDistance(len, i);

- if (best_score < score) {

- best_score = score;

- best_len = len;

- *best_len_out = best_len;

- *best_len_code_out = best_len;

- *best_distance_out = backward;

- *best_score_out = best_score;

- match_found = true;

- }

- const uint32_t key = HashBytes(&data[cur_ix_masked]);

- const uint32_t * __restrict const bucket = &buckets_[key][0];

- const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;

- for (size_t i = num_[key]; i > down;) {

- --i;

- size_t prev_ix = bucket[i & kBlockMask];

- const size_t backward = cur_ix - prev_ix;

- if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {

- break;

- }

- prev_ix &= ring_buffer_mask;

- if (cur_ix_masked + best_len > ring_buffer_mask ||

- prev_ix + best_len > ring_buffer_mask ||

- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {

- continue;

- }

- const size_t len = FindMatchLengthWithLimit(&data[prev_ix],

- &data[cur_ix_masked],

- max_length);

- if (len >= 4) {

- // Comparing for >= 3 does not change the semantics, but just saves

- // for a few unnecessary binary logarithms in backward reference

- // score, since we are not interested in such short matches.

- double score = BackwardReferenceScore(len, backward);

- if (best_score < score) {

- best_score = score;

- best_len = len;

- *best_len_out = best_len;

- *best_len_code_out = best_len;

- *best_distance_out = backward;

- *best_score_out = best_score;

- match_found = true;

- }

- buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);

- ++num_[key];

- if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {

- size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;

- for (int k = 0; k < 2; ++k, ++dict_key) {

- ++num_dict_lookups_;

- const uint16_t v = kStaticDictionaryHash[dict_key];

- if (v > 0) {

- const size_t len = v & 31;

- const size_t dist = v >> 5;

- const size_t offset =

- kBrotliDictionaryOffsetsByLength[len] + len * dist;

- if (len <= max_length) {

- const size_t matchlen =

- FindMatchLengthWithLimit(&data[cur_ix_masked],

- &kBrotliDictionary[offset], len);

- if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {

- const size_t transform_id = kCutoffTransforms[len - matchlen];

- const size_t word_id =

- transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +

- dist;

- const size_t backward = max_backward + word_id + 1;

- double score = BackwardReferenceScore(matchlen, backward);

- if (best_score < score) {

- ++num_dict_matches_;

- best_score = score;

- best_len = matchlen;

- *best_len_out = best_len;

- *best_len_code_out = len;

- *best_distance_out = backward;

- *best_score_out = best_score;

- match_found = true;

- }

- return match_found;

- }

+static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {

+ return self->length_and_code >> 5;

- // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the

- // length of max_length and stores the position cur_ix in the hash table.

- //

- // Sets *num_matches to the number of matches found, and stores the found

- // matches in matches[0] to matches[*num_matches - 1]. The matches will be

- // sorted by strictly increasing length and (non-strictly) increasing

- // distance.

- size_t FindAllMatches(const uint8_t* data,

- const size_t ring_buffer_mask,

- const size_t cur_ix,

- const size_t max_length,

- const size_t max_backward,

- BackwardMatch* matches) {

- BackwardMatch* const orig_matches = matches;

- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

- size_t best_len = 1;

- size_t stop = cur_ix - 64;

- if (cur_ix < 64) { stop = 0; }

- for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {

- size_t prev_ix = i;

- const size_t backward = cur_ix - prev_ix;

- if (PREDICT_FALSE(backward > max_backward)) {

- break;

- }

- prev_ix &= ring_buffer_mask;

- if (data[cur_ix_masked] != data[prev_ix] ||

- data[cur_ix_masked + 1] != data[prev_ix + 1]) {

- continue;

- }

- const size_t len =

- FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],

- max_length);

- if (len > best_len) {

- best_len = len;

- *matches++ = BackwardMatch(backward, len);

- }

- const uint32_t key = HashBytes(&data[cur_ix_masked]);

- const uint32_t * __restrict const bucket = &buckets_[key][0];

- const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;

- for (size_t i = num_[key]; i > down;) {

- --i;

- size_t prev_ix = bucket[i & kBlockMask];

- const size_t backward = cur_ix - prev_ix;

- if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {

- break;

- }

- prev_ix &= ring_buffer_mask;

- if (cur_ix_masked + best_len > ring_buffer_mask ||

- prev_ix + best_len > ring_buffer_mask ||

- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {

- continue;

- }

- const size_t len =

- FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],

- max_length);

- if (len > best_len) {

- best_len = len;

- *matches++ = BackwardMatch(backward, len);

- }

- buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);

- ++num_[key];

- uint32_t dict_matches[kMaxDictionaryMatchLen + 1];

- for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {

- dict_matches[i] = kInvalidMatch;

- }

- size_t minlen = std::max<size_t>(4, best_len + 1);

- if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,

- &dict_matches[0])) {

- size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);

- for (size_t l = minlen; l <= maxlen; ++l) {

- uint32_t dict_id = dict_matches[l];

- if (dict_id < kInvalidMatch) {

- *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,

- dict_id & 31);

- }

- return static_cast<size_t>(matches - orig_matches);

- }

+static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {

+ size_t code = self->length_and_code & 31;

+ return code ? code : BackwardMatchLength(self);

- enum { kHashLength = 4 };

- enum { kHashTypeLength = 4 };

- // HashBytes is the function that chooses the bucket to place

- // the address in. The HashLongestMatch and HashLongestMatchQuickly

- // classes have separate, different implementations of hashing.

- static uint32_t HashBytes(const uint8_t *data) {

- uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;

- // The higher bits contain more mixture from the multiplication,

- // so we take our results from there.

- return h >> (32 - kBucketBits);

- }

+#define EXPAND_CAT(a, b) CAT(a, b)

+#define CAT(a, b) a ## b

+#define FN(X) EXPAND_CAT(X, HASHER())

- enum { kHashMapSize = 2 << kBucketBits };

+#define MAX_NUM_MATCHES_H10 (64 + MAX_TREE_SEARCH_DEPTH)

- static const size_t kMaxNumMatches = 64 + (1 << kBlockBits);

+#define HASHER() H10

+#define HashToBinaryTree HASHER()

- private:

- // Number of hash buckets.

- static const uint32_t kBucketSize = 1 << kBucketBits;

+#define BUCKET_BITS 17

+#define BUCKET_SIZE (1 << BUCKET_BITS)

- // Only kBlockSize newest backward references are kept,

- // and the older are forgotten.

- static const uint32_t kBlockSize = 1 << kBlockBits;

+static size_t FN(HashTypeLength)(void) { return 4; }

+static size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; }

- // Mask for accessing entries in a block (in a ringbuffer manner).

- static const uint32_t kBlockMask = (1 << kBlockBits) - 1;

+static uint32_t FN(HashBytes)(const uint8_t *data) {

+ uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;

+ /* The higher bits contain more mixture from the multiplication,

+ so we take our results from there. */

+ return h >> (32 - BUCKET_BITS);

- // Number of entries in a particular bucket.

- uint16_t num_[kBucketSize];

+/* A (forgetful) hash table where each hash bucket contains a binary tree of

+ sequences whose first 4 bytes share the same hash code.

+ Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting

+ position in the input data. The binary tree is sorted by the lexicographic

+ order of the sequences, and it is also a max-heap with respect to the

+ starting positions. */

+typedef struct HashToBinaryTree {

+ /* The window size minus 1 */

+ size_t window_mask_;

- // Buckets containing kBlockSize of backward references.

- uint32_t buckets_[kBucketSize][kBlockSize];

+ /* Hash table that maps the 4-byte hashes of the sequence to the last

+ position where this hash was found, which is the root of the binary

+ tree of sequences that share this hash bucket. */

+ uint32_t buckets_[BUCKET_SIZE];

- // True if num_ array needs to be initialized.

- bool need_init_;

+ /* The union of the binary trees of each hash bucket. The root of the tree

+ corresponding to a hash is a sequence starting at buckets_[hash] and

+ the left and right children of a sequence starting at pos are

+ forest_[2 * pos] and forest_[2 * pos + 1]. */

+ uint32_t* forest_;

- size_t num_dict_lookups_;

- size_t num_dict_matches_;

-};

+ /* A position used to mark a non-existent sequence, i.e. a tree is empty if

+ its root is at invalid_pos_ and a node is a leaf if both its children

+ are at invalid_pos_. */

+ uint32_t invalid_pos_;

-// A (forgetful) hash table where each hash bucket contains a binary tree of

-// sequences whose first 4 bytes share the same hash code.

-// Each sequence is kMaxTreeCompLength long and is identified by its starting

-// position in the input data. The binary tree is sorted by the lexicographic

-// order of the sequences, and it is also a max-heap with respect to the

-// starting positions.

-class HashToBinaryTree {

- public:

- HashToBinaryTree() : forest_(NULL) {

- Reset();

- }

+ size_t forest_size_;

+ BROTLI_BOOL is_dirty_;

+} HashToBinaryTree;

- ~HashToBinaryTree() {

- delete[] forest_;

- }

+static void FN(Reset)(HashToBinaryTree* self) {

+ self->is_dirty_ = BROTLI_TRUE;

- void Reset() {

- need_init_ = true;

- }

+static void FN(Initialize)(HashToBinaryTree* self) {

+ self->forest_ = NULL;

+ self->forest_size_ = 0;

+ FN(Reset)(self);

- void Init(int lgwin, size_t position, size_t bytes, bool is_last) {

- if (need_init_) {

- window_mask_ = (1u << lgwin) - 1u;

- invalid_pos_ = static_cast<uint32_t>(0 - window_mask_);

- for (uint32_t i = 0; i < kBucketSize; i++) {

- buckets_[i] = invalid_pos_;

- }

- size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1;

- forest_ = new uint32_t[2 * num_nodes];

- need_init_ = false;

- }

+static void FN(Cleanup)(MemoryManager* m, HashToBinaryTree* self) {

+ BROTLI_FREE(m, self->forest_);

- // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the

- // length of max_length and stores the position cur_ix in the hash table.

- //

- // Sets *num_matches to the number of matches found, and stores the found

- // matches in matches[0] to matches[*num_matches - 1]. The matches will be

- // sorted by strictly increasing length and (non-strictly) increasing

- // distance.

- size_t FindAllMatches(const uint8_t* data,

- const size_t ring_buffer_mask,

- const size_t cur_ix,

- const size_t max_length,

- const size_t max_backward,

- BackwardMatch* matches) {

- BackwardMatch* const orig_matches = matches;

- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

- size_t best_len = 1;

- size_t stop = cur_ix - 64;

- if (cur_ix < 64) { stop = 0; }

- for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {

- size_t prev_ix = i;

- const size_t backward = cur_ix - prev_ix;

- if (PREDICT_FALSE(backward > max_backward)) {

- break;

- }

- prev_ix &= ring_buffer_mask;

- if (data[cur_ix_masked] != data[prev_ix] ||

- data[cur_ix_masked + 1] != data[prev_ix + 1]) {

- continue;

- }

- const size_t len =

- FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],

- max_length);

- if (len > best_len) {

- best_len = len;

- *matches++ = BackwardMatch(backward, len);

- }

+static void FN(Init)(

+ MemoryManager* m, HashToBinaryTree* self, const uint8_t* data,

+ const BrotliEncoderParams* params, size_t position, size_t bytes,

+ BROTLI_BOOL is_last) {

+ if (self->is_dirty_) {

+ uint32_t invalid_pos;

+ size_t num_nodes;

+ uint32_t i;

+ BROTLI_UNUSED(data);

+ self->window_mask_ = (1u << params->lgwin) - 1u;

+ invalid_pos = (uint32_t)(0 - self->window_mask_);

+ self->invalid_pos_ = invalid_pos;

+ for (i = 0; i < BUCKET_SIZE; i++) {

+ self->buckets_[i] = invalid_pos;

}

- if (best_len < max_length) {

- matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,

- max_length, &best_len, matches);

- }

- uint32_t dict_matches[kMaxDictionaryMatchLen + 1];

- for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {

- dict_matches[i] = kInvalidMatch;

- }

- size_t minlen = std::max<size_t>(4, best_len + 1);

- if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,

- &dict_matches[0])) {

- size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);

- for (size_t l = minlen; l <= maxlen; ++l) {

- uint32_t dict_id = dict_matches[l];

- if (dict_id < kInvalidMatch) {

- *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,

- dict_id & 31);

- }

+ num_nodes = (position == 0 && is_last) ? bytes : self->window_mask_ + 1;

+ if (num_nodes > self->forest_size_) {

+ BROTLI_FREE(m, self->forest_);

+ self->forest_ = BROTLI_ALLOC(m, uint32_t, 2 * num_nodes);

+ if (BROTLI_IS_OOM(m)) return;

+ self->forest_size_ = num_nodes;

}

- return static_cast<size_t>(matches - orig_matches);

+ self->is_dirty_ = BROTLI_FALSE;

}

- // Stores the hash of the next 4 bytes and re-roots the binary tree at the

- // current sequence, without returning any matches.

- // REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block

- void Store(const uint8_t* data,

- const size_t ring_buffer_mask,

- const size_t cur_ix) {

- size_t best_len = 0;

- StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,

- &best_len, NULL);

- }

+static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,

+ const size_t pos) {

+ return 2 * (pos & self->window_mask_);

- void StitchToPreviousBlock(size_t num_bytes,

- size_t position,

- const uint8_t* ringbuffer,

- size_t ringbuffer_mask) {

- if (num_bytes >= 3 && position >= kMaxTreeCompLength) {

- // Store the last `kMaxTreeCompLength - 1` positions in the hasher.

- // These could not be calculated before, since they require knowledge

- // of both the previous and the current block.

- const size_t i_start = position - kMaxTreeCompLength + 1;

- const size_t i_end = std::min(position, i_start + num_bytes);

- for (size_t i = i_start; i < i_end; ++i) {

- // We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the

- // end of the current block and that we have at least

- // kMaxTreeCompLength tail in the ringbuffer.

- Store(ringbuffer, ringbuffer_mask, i);

- }

+static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,

+ const size_t pos) {

+ return 2 * (pos & self->window_mask_) + 1;

- static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;

- private:

- // Stores the hash of the next 4 bytes and in a single tree-traversal, the

- // hash bucket's binary tree is searched for matches and is re-rooted at the

- // current position.

- //

- // If less than kMaxTreeCompLength data is available, the hash bucket of the

- // current position is searched for matches, but the state of the hash table

- // is not changed, since we can not know the final sorting order of the

- // current (incomplete) sequence.

- //

- // This function must be called with increasing cur_ix positions.

- BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data,

- const size_t cur_ix,

- const size_t ring_buffer_mask,

- const size_t max_length,

- size_t* const __restrict best_len,

- BackwardMatch* __restrict matches) {

- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

- const size_t max_backward = window_mask_ - 15;

- const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength);

- const bool reroot_tree = max_length >= kMaxTreeCompLength;

- const uint32_t key = HashBytes(&data[cur_ix_masked]);

- size_t prev_ix = buckets_[key];

- // The forest index of the rightmost node of the left subtree of the new

- // root, updated as we traverse and reroot the tree of the hash bucket.

- size_t node_left = LeftChildIndex(cur_ix);

- // The forest index of the leftmost node of the right subtree of the new

- // root, updated as we traverse and reroot the tree of the hash bucket.

- size_t node_right = RightChildIndex(cur_ix);

- // The match length of the rightmost node of the left subtree of the new

- // root, updated as we traverse and reroot the tree of the hash bucket.

- size_t best_len_left = 0;

- // The match length of the leftmost node of the right subtree of the new

- // root, updated as we traverse and reroot the tree of the hash bucket.

- size_t best_len_right = 0;

- if (reroot_tree) {

- buckets_[key] = static_cast<uint32_t>(cur_ix);

- }

- for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) {

- const size_t backward = cur_ix - prev_ix;

- const size_t prev_ix_masked = prev_ix & ring_buffer_mask;

- if (backward == 0 || backward > max_backward || depth_remaining == 0) {

- if (reroot_tree) {

- forest_[node_left] = invalid_pos_;

- forest_[node_right] = invalid_pos_;

- }

- break;

+/* Stores the hash of the next 4 bytes and in a single tree-traversal, the

+ hash bucket's binary tree is searched for matches and is re-rooted at the

+ current position.

+ If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the

+ current position is searched for matches, but the state of the hash table

+ is not changed, since we can not know the final sorting order of the

+ current (incomplete) sequence.

+ This function must be called with increasing cur_ix positions. */

+static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(

+ HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data,

+ const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,

+ const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,

+ BackwardMatch* BROTLI_RESTRICT matches) {

+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

+ const size_t max_comp_len =

+ BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);

+ const BROTLI_BOOL should_reroot_tree =

+ TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);

+ const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);

+ size_t prev_ix = self->buckets_[key];

+ /* The forest index of the rightmost node of the left subtree of the new

+ root, updated as we traverse and re-root the tree of the hash bucket. */

+ size_t node_left = FN(LeftChildIndex)(self, cur_ix);

+ /* The forest index of the leftmost node of the right subtree of the new

+ root, updated as we traverse and re-root the tree of the hash bucket. */

+ size_t node_right = FN(RightChildIndex)(self, cur_ix);

+ /* The match length of the rightmost node of the left subtree of the new

+ root, updated as we traverse and re-root the tree of the hash bucket. */

+ size_t best_len_left = 0;

+ /* The match length of the leftmost node of the right subtree of the new

+ root, updated as we traverse and re-root the tree of the hash bucket. */

+ size_t best_len_right = 0;

+ size_t depth_remaining;

+ if (should_reroot_tree) {

+ self->buckets_[key] = (uint32_t)cur_ix;

+ }

+ for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {

+ const size_t backward = cur_ix - prev_ix;

+ const size_t prev_ix_masked = prev_ix & ring_buffer_mask;

+ if (backward == 0 || backward > max_backward || depth_remaining == 0) {

+ if (should_reroot_tree) {

+ self->forest_[node_left] = self->invalid_pos_;

+ self->forest_[node_right] = self->invalid_pos_;

}

- const size_t cur_len = std::min(best_len_left, best_len_right);

- const size_t len = cur_len +

+ break;

+ }

+ {

+ const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);

+ size_t len;

+ assert(cur_len <= MAX_TREE_COMP_LENGTH);

+ len = cur_len +

FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],

&data[prev_ix_masked + cur_len],

max_length - cur_len);

- if (len > *best_len) {

+ assert(0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));

+ if (matches && len > *best_len) {

*best_len = len;

- if (matches) {

- *matches++ = BackwardMatch(backward, len);

- }

- if (len >= max_comp_len) {

- if (reroot_tree) {

- forest_[node_left] = forest_[LeftChildIndex(prev_ix)];

- forest_[node_right] = forest_[RightChildIndex(prev_ix)];

- }

- break;

+ InitBackwardMatch(matches++, backward, len);

+ }

+ if (len >= max_comp_len) {

+ if (should_reroot_tree) {

+ self->forest_[node_left] =

+ self->forest_[FN(LeftChildIndex)(self, prev_ix)];

+ self->forest_[node_right] =

+ self->forest_[FN(RightChildIndex)(self, prev_ix)];

}

+ break;

}

if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {

best_len_left = len;

- if (reroot_tree) {

- forest_[node_left] = static_cast<uint32_t>(prev_ix);

+ if (should_reroot_tree) {

+ self->forest_[node_left] = (uint32_t)prev_ix;

}

- node_left = RightChildIndex(prev_ix);

- prev_ix = forest_[node_left];

+ node_left = FN(RightChildIndex)(self, prev_ix);

+ prev_ix = self->forest_[node_left];

} else {

best_len_right = len;

- if (reroot_tree) {

- forest_[node_right] = static_cast<uint32_t>(prev_ix);

+ if (should_reroot_tree) {

+ self->forest_[node_right] = (uint32_t)prev_ix;

}

- node_right = LeftChildIndex(prev_ix);

- prev_ix = forest_[node_right];

+ node_right = FN(LeftChildIndex)(self, prev_ix);

+ prev_ix = self->forest_[node_right];

}

- return matches;

- }

- inline size_t LeftChildIndex(const size_t pos) {

- return 2 * (pos & window_mask_);

}

+ return matches;

- inline size_t RightChildIndex(const size_t pos) {

- return 2 * (pos & window_mask_) + 1;

+/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the

+ length of max_length and stores the position cur_ix in the hash table.

+ Sets *num_matches to the number of matches found, and stores the found

+ matches in matches[0] to matches[*num_matches - 1]. The matches will be

+ sorted by strictly increasing length and (non-strictly) increasing

+ distance. */

+static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,

+ const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix,

+ const size_t max_length, const size_t max_backward,

+ const BrotliEncoderParams* params, BackwardMatch* matches) {

+ BackwardMatch* const orig_matches = matches;

+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

+ size_t best_len = 1;

+ const size_t short_match_max_backward =

+ params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;

+ size_t stop = cur_ix - short_match_max_backward;

+ uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];

+ size_t i;

+ if (cur_ix < short_match_max_backward) { stop = 0; }

+ for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {

+ size_t prev_ix = i;

+ const size_t backward = cur_ix - prev_ix;

+ if (BROTLI_PREDICT_FALSE(backward > max_backward)) {

+ break;

+ }

+ prev_ix &= ring_buffer_mask;

+ if (data[cur_ix_masked] != data[prev_ix] ||

+ data[cur_ix_masked + 1] != data[prev_ix + 1]) {

+ continue;

+ }

+ {

+ const size_t len =

+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],

+ max_length);

+ if (len > best_len) {

+ best_len = len;

+ InitBackwardMatch(matches++, backward, len);

+ }

}

- static uint32_t HashBytes(const uint8_t *data) {

- uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;

- // The higher bits contain more mixture from the multiplication,

- // so we take our results from there.

- return h >> (32 - kBucketBits);

+ if (best_len < max_length) {

+ matches = FN(StoreAndFindMatches)(self, data, cur_ix, ring_buffer_mask,

+ max_length, max_backward, &best_len, matches);

+ }

+ for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {

+ dict_matches[i] = kInvalidMatch;

+ }

+ {

+ size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);

+ if (BrotliFindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen,

+ max_length, &dict_matches[0])) {

+ size_t maxlen = BROTLI_MIN(

+ size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);

+ size_t l;

+ for (l = minlen; l <= maxlen; ++l) {

+ uint32_t dict_id = dict_matches[l];

+ if (dict_id < kInvalidMatch) {

+ InitDictionaryBackwardMatch(matches++,

+ max_backward + (dict_id >> 5) + 1, l, dict_id & 31);

+ }

}

+ return (size_t)(matches - orig_matches);

- static const int kBucketBits = 17;

- static const size_t kBucketSize = 1 << kBucketBits;

- // The window size minus 1

- size_t window_mask_;

- // Hash table that maps the 4-byte hashes of the sequence to the last

- // position where this hash was found, which is the root of the binary

- // tree of sequences that share this hash bucket.

- uint32_t buckets_[kBucketSize];

- // The union of the binary trees of each hash bucket. The root of the tree

- // corresponding to a hash is a sequence starting at buckets_[hash] and

- // the left and right children of a sequence starting at pos are

- // forest_[2 * pos] and forest_[2 * pos + 1].

- uint32_t* forest_;

- // A position used to mark a non-existent sequence, i.e. a tree is empty if

- // its root is at invalid_pos_ and a node is a leaf if both its children

- // are at invalid_pos_.

- uint32_t invalid_pos_;

- bool need_init_;

-};

-struct Hashers {

- // For kBucketSweep == 1, enabling the dictionary lookup makes compression

- // a little faster (0.5% - 1%) and it compresses 0.15% better on small text

- // and html inputs.

- typedef HashLongestMatchQuickly<16, 1, true> H2;

- typedef HashLongestMatchQuickly<16, 2, false> H3;

- typedef HashLongestMatchQuickly<17, 4, true> H4;

- typedef HashLongestMatch<14, 4, 4> H5;

- typedef HashLongestMatch<14, 5, 4> H6;

- typedef HashLongestMatch<15, 6, 10> H7;

- typedef HashLongestMatch<15, 7, 10> H8;

- typedef HashLongestMatch<15, 8, 16> H9;

- typedef HashToBinaryTree H10;

- Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),

- hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {}

- ~Hashers(void) {

- delete hash_h2;

- delete hash_h3;

- delete hash_h4;

- delete hash_h5;

- delete hash_h6;

- delete hash_h7;

- delete hash_h8;

- delete hash_h9;

- delete hash_h10;

- }

+/* Stores the hash of the next 4 bytes and re-roots the binary tree at the

+ current sequence, without returning any matches.

+ REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */

+static BROTLI_INLINE void FN(Store)(HashToBinaryTree* self, const uint8_t *data,

+ const size_t mask, const size_t ix) {

+ /* Maximum distance is window size - 16, see section 9.1. of the spec. */

+ const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;

+ FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,

+ max_backward, NULL, NULL);

- void Init(int type) {

- switch (type) {

- case 2: hash_h2 = new H2; break;

- case 3: hash_h3 = new H3; break;

- case 4: hash_h4 = new H4; break;

- case 5: hash_h5 = new H5; break;

- case 6: hash_h6 = new H6; break;

- case 7: hash_h7 = new H7; break;

- case 8: hash_h8 = new H8; break;

- case 9: hash_h9 = new H9; break;

- case 10: hash_h10 = new H10; break;

- default: break;

+static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,

+ const uint8_t *data, const size_t mask, const size_t ix_start,

+ const size_t ix_end) {

+ size_t i = ix_start;

+ size_t j = ix_start;

+ if (ix_start + 63 <= ix_end) {

+ i = ix_end - 63;

+ }

+ if (ix_start + 512 <= i) {

+ for (; j < i; j += 8) {

+ FN(Store)(self, data, mask, j);

}

+ for (; i < ix_end; ++i) {

+ FN(Store)(self, data, mask, i);

+ }

- template<typename Hasher>

- void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {

- hasher->Init();

- for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {

- hasher->Store(&dict[i], static_cast<uint32_t>(i));

+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashToBinaryTree* self,

+ size_t num_bytes, size_t position, const uint8_t* ringbuffer,

+ size_t ringbuffer_mask) {

+ if (num_bytes >= FN(HashTypeLength)() - 1 &&

+ position >= MAX_TREE_COMP_LENGTH) {

+ /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.

+ These could not be calculated before, since they require knowledge

+ of both the previous and the current block. */

+ const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;

+ const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);

+ size_t i;

+ for (i = i_start; i < i_end; ++i) {

+ /* Maximum distance is window size - 16, see section 9.1. of the spec.

+ Furthermore, we have to make sure that we don't look further back

+ from the start of the next block than the window size, otherwise we

+ could access already overwritten areas of the ring-buffer. */

+ const size_t max_backward =

+ self->window_mask_ - BROTLI_MAX(size_t,

+ BROTLI_WINDOW_GAP - 1,

+ position - i);

+ /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the

+ end of the current block and that we have at least

+ MAX_TREE_COMP_LENGTH tail in the ring-buffer. */

+ FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,

+ MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);

}

- // Custom LZ77 window.

- void PrependCustomDictionary(

- int type, int lgwin, const size_t size, const uint8_t* dict) {

- switch (type) {

- case 2: WarmupHash(size, dict, hash_h2); break;

- case 3: WarmupHash(size, dict, hash_h3); break;

- case 4: WarmupHash(size, dict, hash_h4); break;

- case 5: WarmupHash(size, dict, hash_h5); break;

- case 6: WarmupHash(size, dict, hash_h6); break;

- case 7: WarmupHash(size, dict, hash_h7); break;

- case 8: WarmupHash(size, dict, hash_h8); break;

- case 9: WarmupHash(size, dict, hash_h9); break;

- case 10:

- hash_h10->Init(lgwin, 0, size, false);

- for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {

- hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);

- }

- break;

- default: break;

- }

+#undef BUCKET_SIZE

+#undef BUCKET_BITS

+#undef HASHER

+/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression

+ a little faster (0.5% - 1%) and it compresses 0.15% better on small text

+ and HTML inputs. */

+#define HASHER() H2

+#define BUCKET_BITS 16

+#define BUCKET_SWEEP 1

+#define USE_DICTIONARY 1

+#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */

+#undef BUCKET_SWEEP

+#undef USE_DICTIONARY

+#undef HASHER

+#define HASHER() H3

+#define BUCKET_SWEEP 2

+#define USE_DICTIONARY 0

+#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */

+#undef USE_DICTIONARY

+#undef BUCKET_SWEEP

+#undef BUCKET_BITS

+#undef HASHER

+#define HASHER() H4

+#define BUCKET_BITS 17

+#define BUCKET_SWEEP 4

+#define USE_DICTIONARY 1

+#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */

+#undef USE_DICTIONARY

+#undef BUCKET_SWEEP

+#undef BUCKET_BITS

+#undef HASHER

+#define HASHER() H5

+#define BUCKET_BITS 14

+#define BLOCK_BITS 4

+#define NUM_LAST_DISTANCES_TO_CHECK 4

+#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */

+#undef BLOCK_BITS

+#undef HASHER

+#define HASHER() H6

+#define BLOCK_BITS 5

+#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */

+#undef NUM_LAST_DISTANCES_TO_CHECK

+#undef BLOCK_BITS

+#undef BUCKET_BITS

+#undef HASHER

+#define HASHER() H7

+#define BUCKET_BITS 15

+#define BLOCK_BITS 6

+#define NUM_LAST_DISTANCES_TO_CHECK 10

+#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */

+#undef BLOCK_BITS

+#undef HASHER

+#define HASHER() H8

+#define BLOCK_BITS 7

+#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */

+#undef NUM_LAST_DISTANCES_TO_CHECK

+#undef BLOCK_BITS

+#undef HASHER

+#define HASHER() H9

+#define BLOCK_BITS 8

+#define NUM_LAST_DISTANCES_TO_CHECK 16

+#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */

+#undef NUM_LAST_DISTANCES_TO_CHECK

+#undef BLOCK_BITS

+#undef BUCKET_BITS

+#undef HASHER

+#define BUCKET_BITS 15

+#define NUM_LAST_DISTANCES_TO_CHECK 4

+#define NUM_BANKS 1

+#define BANK_BITS 16

+#define HASHER() H40

+#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */

+#undef HASHER

+#undef NUM_LAST_DISTANCES_TO_CHECK

+#define NUM_LAST_DISTANCES_TO_CHECK 10

+#define HASHER() H41

+#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */

+#undef HASHER

+#undef NUM_LAST_DISTANCES_TO_CHECK

+#undef NUM_BANKS

+#undef BANK_BITS

+#define NUM_LAST_DISTANCES_TO_CHECK 16

+#define NUM_BANKS 512

+#define BANK_BITS 9

+#define HASHER() H42

+#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */

+#undef HASHER

+#undef NUM_LAST_DISTANCES_TO_CHECK

+#undef NUM_BANKS

+#undef BANK_BITS

+#undef BUCKET_BITS

+#undef FN

+#undef CAT

+#undef EXPAND_CAT

+#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(7) H(8) H(9) \

+ H(40) H(41) H(42)

+#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)

+typedef struct Hashers {

+#define MEMBER_(N) H ## N* h ## N;

+ FOR_ALL_HASHERS(MEMBER_)

+#undef MEMBER_

+} Hashers;

+static BROTLI_INLINE void InitHashers(Hashers* self) {

+#define INIT_(N) self->h ## N = 0;

+ FOR_ALL_HASHERS(INIT_)

+#undef INIT_

+static BROTLI_INLINE void DestroyHashers(MemoryManager* m, Hashers* self) {

+ if (self->h10) CleanupH10(m, self->h10);

+#define CLEANUP_(N) BROTLI_FREE(m, self->h ## N)

+ FOR_ALL_HASHERS(CLEANUP_)

+#undef CLEANUP_

+static BROTLI_INLINE void HashersReset(Hashers* self, int type) {

+ switch (type) {

+#define RESET_(N) case N: ResetH ## N(self->h ## N); break;

+ FOR_ALL_HASHERS(RESET_)

+#undef RESET_

+ default: break;

}

+static BROTLI_INLINE void HashersSetup(

+ MemoryManager* m, Hashers* self, int type) {

+ switch (type) {

+#define SETUP_(N) case N: self->h ## N = BROTLI_ALLOC(m, H ## N, 1); break;

+ FOR_ALL_HASHERS(SETUP_)

+#undef SETUP_

+ default: break;

+ }

+ if (BROTLI_IS_OOM(m)) return;

+ if (type == 10) InitializeH10(self->h10);

+ HashersReset(self, type);

+#define WARMUP_HASH_(N) \

+static BROTLI_INLINE void WarmupHashH ## N(MemoryManager* m, \

+ const BrotliEncoderParams* params, const size_t size, const uint8_t* dict, \

+ H ## N* hasher) { \

+ size_t overlap = (StoreLookaheadH ## N()) - 1; \

+ size_t i; \

+ InitH ## N(m, hasher, dict, params, 0, size, BROTLI_FALSE); \

+ if (BROTLI_IS_OOM(m)) return; \

+ for (i = 0; i + overlap < size; i++) { \

+ StoreH ## N(hasher, dict, ~(size_t)0, i); \

+ } \

+FOR_ALL_HASHERS(WARMUP_HASH_)

+#undef WARMUP_HASH_

+/* Custom LZ77 window. */

+static BROTLI_INLINE void HashersPrependCustomDictionary(

+ MemoryManager* m, Hashers* self, const BrotliEncoderParams* params,

+ const size_t size, const uint8_t* dict) {

+ int hasher_type = ChooseHasher(params);

+ switch (hasher_type) {

+#define PREPEND_(N) \

+ case N: WarmupHashH ## N(m, params, size, dict, self->h ## N); break;

+ FOR_ALL_HASHERS(PREPEND_)

+#undef PREPEND_

+ default: break;

+ }

+ if (BROTLI_IS_OOM(m)) return;

- H2* hash_h2;

- H3* hash_h3;

- H4* hash_h4;

- H5* hash_h5;

- H6* hash_h6;

- H7* hash_h7;

- H8* hash_h8;

- H9* hash_h9;

- H10* hash_h10;

-};

-} // namespace brotli

+#if defined(__cplusplus) || defined(c_plusplus)

+} /* extern "C" */

+#endif

-#endif // BROTLI_ENC_HASH_H_

+#endif /* BROTLI_ENC_HASH_H_ */

« no previous file with comments | « third_party/brotli/enc/find_match_length.h ('k') | third_party/brotli/enc/hash_forgetful_chain_inc.h » ('j') | no next file with comments »