third_party/brotli/enc/hash.h - Issue 2537133002: Update brotli to v1.0.0-snapshot.

Side by Side Diff: third_party/brotli/enc/hash.h

Issue 2537133002: Update brotli to v1.0.0-snapshot. (Closed)

Patch Set: Rename 'out' variable Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /* Copyright 2010 Google Inc. All Rights Reserved.	1 /* Copyright 2010 Google Inc. All Rights Reserved.

2	2

3 Distributed under MIT license.	3 Distributed under MIT license.

4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT	4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT

5 */	5 */

6	6

7 // A (forgetful) hash table to the data seen by the compressor, to	7 /* A (forgetful) hash table to the data seen by the compressor, to

8 // help create backward references to previous data.	8 help create backward references to previous data. */

9	9

10 #ifndef BROTLI_ENC_HASH_H_	10 #ifndef BROTLI_ENC_HASH_H_

11 #define BROTLI_ENC_HASH_H_	11 #define BROTLI_ENC_HASH_H_

12	12

13 #include <sys/types.h>	13 #include <string.h> /* memcmp, memset */

14 #include <algorithm>

15 #include <cstring>

16 #include <limits>

17	14

	15 #include "../common/constants.h"

	16 #include "../common/dictionary.h"

	17 #include <brotli/types.h>

18 #include "./dictionary_hash.h"	18 #include "./dictionary_hash.h"

19 #include "./fast_log.h"	19 #include "./fast_log.h"

20 #include "./find_match_length.h"	20 #include "./find_match_length.h"

	21 #include "./memory.h"

21 #include "./port.h"	22 #include "./port.h"

22 #include "./prefix.h"	23 #include "./quality.h"

23 #include "./static_dict.h"	24 #include "./static_dict.h"

24 #include "./transform.h"

25 #include "./types.h"

26	25

27 namespace brotli {	26 #if defined(__cplusplus) \|\| defined(c_plusplus)

	27 extern "C" {

	28 #endif

28	29

29 static const size_t kMaxTreeSearchDepth = 64;	30 #define MAX_TREE_SEARCH_DEPTH 64

30 static const size_t kMaxTreeCompLength = 128;	31 #define MAX_TREE_COMP_LENGTH 128

	32 #define score_t size_t

31	33

32 static const uint32_t kDistanceCacheIndex[] = {	34 static const uint32_t kDistanceCacheIndex[] = {

33 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,	35 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,

34 };	36 };

35 static const int kDistanceCacheOffset[] = {	37 static const int kDistanceCacheOffset[] = {

36 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3	38 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3

37 };	39 };

38	40

39 static const uint32_t kCutoffTransformsCount = 10;	41 static const uint32_t kCutoffTransformsCount = 10;

40 static const uint8_t kCutoffTransforms[] = {	42 static const uint8_t kCutoffTransforms[] = {

41 0, 12, 27, 23, 42, 63, 56, 48, 59, 64	43 0, 12, 27, 23, 42, 63, 56, 48, 59, 64

42 };	44 };

43	45

44 // kHashMul32 multiplier has these properties:	46 typedef struct HasherSearchResult {

45 // * The multiplier must be odd. Otherwise we may lose the highest bit.	47 size_t len;

46 // * No long streaks of 1s or 0s.	48 size_t len_x_code; /* == len ^ len_code */

47 // * There is no effort to ensure that it is a prime, the oddity is enough	49 size_t distance;

48 // for this use.	50 score_t score;

49 // * The number has been tuned heuristically against compression benchmarks.	51 } HasherSearchResult;

	52

	53 typedef struct DictionarySearchStatictics {

	54 size_t num_lookups;

	55 size_t num_matches;

	56 } DictionarySearchStatictics;

	57

	58 /* kHashMul32 multiplier has these properties:

	59 * The multiplier must be odd. Otherwise we may lose the highest bit.

	60 * No long streaks of ones or zeros.

	61 * There is no effort to ensure that it is a prime, the oddity is enough

	62 for this use.

	63 * The number has been tuned heuristically against compression benchmarks. */

50 static const uint32_t kHashMul32 = 0x1e35a7bd;	64 static const uint32_t kHashMul32 = 0x1e35a7bd;

51	65

52 template<int kShiftBits>	66 static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {

53 inline uint32_t Hash(const uint8_t *data) {

54 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;	67 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;

55 // The higher bits contain more mixture from the multiplication,	68 /* The higher bits contain more mixture from the multiplication,

56 // so we take our results from there.	69 so we take our results from there. */

57 return h >> (32 - kShiftBits);	70 return h >> (32 - 14);

58 }	71 }

59	72

60 // Usually, we always choose the longest backward reference. This function	73 #define BROTLI_LITERAL_BYTE_SCORE 540

61 // allows for the exception of that rule.	74 #define BROTLI_DISTANCE_BIT_PENALTY 120

62 //	75 /* Score must be positive after applying maximal penalty. */

63 // If we choose a backward reference that is further away, it will	76 #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))

64 // usually be coded with more bits. We approximate this by assuming	77

65 // log2(distance). If the distance can be expressed in terms of the	78 /* Usually, we always choose the longest backward reference. This function

66 // last four distances, we use some heuristic constants to estimate	79 allows for the exception of that rule.

67 // the bits cost. For the first up to four literals we use the bit	80

68 // cost of the literals from the literal cost model, after that we	81 If we choose a backward reference that is further away, it will

69 // use the average bit cost of the cost model.	82 usually be coded with more bits. We approximate this by assuming

70 //	83 log2(distance). If the distance can be expressed in terms of the

71 // This function is used to sometimes discard a longer backward reference	84 last four distances, we use some heuristic constants to estimate

72 // when it is not much longer and the bit cost for encoding it is more	85 the bits cost. For the first up to four literals we use the bit

73 // than the saved literals.	86 cost of the literals from the literal cost model, after that we

74 //	87 use the average bit cost of the cost model.

75 // backward_reference_offset MUST be positive.	88

76 inline double BackwardReferenceScore(size_t copy_length,	89 This function is used to sometimes discard a longer backward reference

77 size_t backward_reference_offset) {	90 when it is not much longer and the bit cost for encoding it is more

78 return 5.4 * static_cast<double>(copy_length) -	91 than the saved literals.

79 1.20 * Log2FloorNonZero(backward_reference_offset);	92

80 }	93 backward_reference_offset MUST be positive. */

81	94 static BROTLI_INLINE score_t BackwardReferenceScore(

82 inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,	95 size_t copy_length, size_t backward_reference_offset) {

83 size_t distance_short_code) {	96 return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -

84 static const double kDistanceShortCodeBitCost[16] = {	97 BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);

85 -0.6, 0.95, 1.17, 1.27,	98 }

86 0.93, 0.93, 0.96, 0.96, 0.99, 0.99,	99

87 1.05, 1.05, 1.15, 1.15, 1.25, 1.25	100 static const score_t kDistanceShortCodeCost[BROTLI_NUM_DISTANCE_SHORT_CODES] = {

88 };	101 /* Repeat last */

89 return 5.4 * static_cast<double>(copy_length) -	102 BROTLI_SCORE_BASE + 60,

90 kDistanceShortCodeBitCost[distance_short_code];	103 /* 2nd, 3rd, 4th last */

91 }	104 BROTLI_SCORE_BASE - 95,

92	105 BROTLI_SCORE_BASE - 117,

93 struct BackwardMatch {	106 BROTLI_SCORE_BASE - 127,

94 BackwardMatch(void) : distance(0), length_and_code(0) {}	107 /* Last with offset */

95	108 BROTLI_SCORE_BASE - 93,

96 BackwardMatch(size_t dist, size_t len)	109 BROTLI_SCORE_BASE - 93,

97 : distance(static_cast<uint32_t>(dist))	110 BROTLI_SCORE_BASE - 96,

98 , length_and_code(static_cast<uint32_t>(len << 5)) {}	111 BROTLI_SCORE_BASE - 96,

99	112 BROTLI_SCORE_BASE - 99,

100 BackwardMatch(size_t dist, size_t len, size_t len_code)	113 BROTLI_SCORE_BASE - 99,

101 : distance(static_cast<uint32_t>(dist))	114 /* 2nd last with offset */

102 , length_and_code(static_cast<uint32_t>(	115 BROTLI_SCORE_BASE - 105,

103 (len << 5) \| (len == len_code ? 0 : len_code))) {}	116 BROTLI_SCORE_BASE - 105,

104	117 BROTLI_SCORE_BASE - 115,

105 size_t length(void) const {	118 BROTLI_SCORE_BASE - 115,

106 return length_and_code >> 5;	119 BROTLI_SCORE_BASE - 125,

107 }	120 BROTLI_SCORE_BASE - 125

108 size_t length_code(void) const {	121 };

109 size_t code = length_and_code & 31;	122

110 return code ? code : length();	123 static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(

111 }	124 size_t copy_length, size_t distance_short_code) {

112	125 return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +

	126 kDistanceShortCodeCost[distance_short_code];

	127 }

	128

	129 static BROTLI_INLINE void DictionarySearchStaticticsReset(

	130 DictionarySearchStatictics* self) {

	131 self->num_lookups = 0;

	132 self->num_matches = 0;

	133 }

	134

	135 static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(

	136 size_t item, const uint8_t* data, size_t max_length, size_t max_backward,

	137 HasherSearchResult* out) {

	138 size_t len;

	139 size_t dist;

	140 size_t offset;

	141 size_t matchlen;

	142 size_t backward;

	143 score_t score;

	144 len = item & 31;

	145 dist = item >> 5;

	146 offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;

	147 if (len > max_length) {

	148 return BROTLI_FALSE;

	149 }

	150

	151 matchlen = FindMatchLengthWithLimit(data, &kBrotliDictionary[offset], len);

	152 if (matchlen + kCutoffTransformsCount <= len \|\| matchlen == 0) {

	153 return BROTLI_FALSE;

	154 }

	155 {

	156 size_t transform_id = kCutoffTransforms[len - matchlen];

	157 backward = max_backward + dist + 1 +

	158 (transform_id << kBrotliDictionarySizeBitsByLength[len]);

	159 }

	160 score = BackwardReferenceScore(matchlen, backward);

	161 if (score < out->score) {

	162 return BROTLI_FALSE;

	163 }

	164 out->len = matchlen;

	165 out->len_x_code = len ^ matchlen;

	166 out->distance = backward;

	167 out->score = score;

	168 return BROTLI_TRUE;

	169 }

	170

	171 static BROTLI_INLINE BROTLI_BOOL SearchInStaticDictionary(

	172 DictionarySearchStatictics* self, const uint8_t* data, size_t max_length,

	173 size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {

	174 size_t key;

	175 size_t i;

	176 BROTLI_BOOL is_match_found = BROTLI_FALSE;

	177 if (self->num_matches < (self->num_lookups >> 7)) {

	178 return BROTLI_FALSE;

	179 }

	180 key = Hash14(data) << 1;

	181 for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {

	182 size_t item = kStaticDictionaryHash[key];

	183 self->num_lookups++;

	184 if (item != 0 &&

	185 TestStaticDictionaryItem(item, data, max_length, max_backward, out)) {

	186 self->num_matches++;

	187 is_match_found = BROTLI_TRUE;

	188 }

	189 }

	190 return is_match_found;

	191 }

	192

	193 typedef struct BackwardMatch {

113 uint32_t distance;	194 uint32_t distance;

114 uint32_t length_and_code;	195 uint32_t length_and_code;

115 };	196 } BackwardMatch;

116	197

117 // A (forgetful) hash table to the data seen by the compressor, to	198 static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,

118 // help create backward references to previous data.	199 size_t dist, size_t len) {

119 //	200 self->distance = (uint32_t)dist;

120 // This is a hash map of fixed size (kBucketSize). Starting from the	201 self->length_and_code = (uint32_t)(len << 5);

121 // given index, kBucketSweep buckets are used to store values of a key.	202 }

122 template <int kBucketBits, int kBucketSweep, bool kUseDictionary>	203

123 class HashLongestMatchQuickly {	204 static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,

124 public:	205 size_t dist, size_t len, size_t len_code) {

125 HashLongestMatchQuickly(void) {	206 self->distance = (uint32_t)dist;

126 Reset();	207 self->length_and_code =

127 }	208 (uint32_t)((len << 5) \| (len == len_code ? 0 : len_code));

128 void Reset(void) {	209 }

129 need_init_ = true;	210

130 num_dict_lookups_ = 0;	211 static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {

131 num_dict_matches_ = 0;	212 return self->length_and_code >> 5;

132 }	213 }

133 void Init(void) {	214

134 if (need_init_) {	215 static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {

135 // It is not strictly necessary to fill this buffer here, but	216 size_t code = self->length_and_code & 31;

136 // not filling will make the results of the compression stochastic	217 return code ? code : BackwardMatchLength(self);

137 // (but correct). This is because random data would cause the	218 }

138 // system to find accidentally good backward references here and there.	219

139 memset(&buckets_[0], 0, sizeof(buckets_));	220 #define EXPAND_CAT(a, b) CAT(a, b)

140 need_init_ = false;	221 #define CAT(a, b) a ## b

141 }	222 #define FN(X) EXPAND_CAT(X, HASHER())

142 }	223

143 void InitForData(const uint8_t* data, size_t num) {	224 #define MAX_NUM_MATCHES_H10 (64 + MAX_TREE_SEARCH_DEPTH)

144 for (size_t i = 0; i < num; ++i) {	225

145 const uint32_t key = HashBytes(&data[i]);	226 #define HASHER() H10

146 memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0]));	227 #define HashToBinaryTree HASHER()

147 need_init_ = false;	228

148 }	229 #define BUCKET_BITS 17

149 }	230 #define BUCKET_SIZE (1 << BUCKET_BITS)

150 // Look at 4 bytes at data.	231

151 // Compute a hash from these, and store the value somewhere within	232 static size_t FN(HashTypeLength)(void) { return 4; }

152 // [ix .. ix+3].	233 static size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; }

153 inline void Store(const uint8_t *data, const uint32_t ix) {	234

154 const uint32_t key = HashBytes(data);	235 static uint32_t FN(HashBytes)(const uint8_t *data) {

155 // Wiggle the value with the bucket sweep range.	236 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;

156 const uint32_t off = (ix >> 3) % kBucketSweep;	237 /* The higher bits contain more mixture from the multiplication,

157 buckets_[key + off] = ix;	238 so we take our results from there. */

158 }	239 return h >> (32 - BUCKET_BITS);

159	240 }

160 // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]	241

161 // up to the length of max_length and stores the position cur_ix in the	242 /* A (forgetful) hash table where each hash bucket contains a binary tree of

162 // hash table.	243 sequences whose first 4 bytes share the same hash code.

163 //	244 Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting

164 // Does not look for matches longer than max_length.	245 position in the input data. The binary tree is sorted by the lexicographic

165 // Does not look for matches further away than max_backward.	246 order of the sequences, and it is also a max-heap with respect to the

166 // Writes the best found match length into best_len_out.	247 starting positions. */

167 // Writes the index (&data[index]) of the start of the best match into	248 typedef struct HashToBinaryTree {

168 // best_distance_out.	249 /* The window size minus 1 */

169 inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,	250 size_t window_mask_;

170 const size_t ring_buffer_mask,	251

171 const int* __restrict distance_cache,	252 /* Hash table that maps the 4-byte hashes of the sequence to the last

172 const size_t cur_ix,	253 position where this hash was found, which is the root of the binary

173 const size_t max_length,	254 tree of sequences that share this hash bucket. */

174 const size_t max_backward,	255 uint32_t buckets_[BUCKET_SIZE];

175 size_t * __restrict best_len_out,	256

176 size_t * __restrict best_len_code_out,	257 /* The union of the binary trees of each hash bucket. The root of the tree

177 size_t * __restrict best_distance_out,	258 corresponding to a hash is a sequence starting at buckets_[hash] and

178 double* __restrict best_score_out) {	259 the left and right children of a sequence starting at pos are

179 const size_t best_len_in = *best_len_out;	260 forest_[2 * pos] and forest_[2 * pos + 1]. */

180 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;	261 uint32_t* forest_;

181 const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);	262

182 int compare_char = ring_buffer[cur_ix_masked + best_len_in];	263 /* A position used to mark a non-existent sequence, i.e. a tree is empty if

183 double best_score = *best_score_out;	264 its root is at invalid_pos_ and a node is a leaf if both its children

184 size_t best_len = best_len_in;	265 are at invalid_pos_. */

185 size_t cached_backward = static_cast<size_t>(distance_cache[0]);	266 uint32_t invalid_pos_;

186 size_t prev_ix = cur_ix - cached_backward;	267

187 bool match_found = false;	268 size_t forest_size_;

188 if (prev_ix < cur_ix) {	269 BROTLI_BOOL is_dirty_;

189 prev_ix &= static_cast<uint32_t>(ring_buffer_mask);	270 } HashToBinaryTree;

190 if (compare_char == ring_buffer[prev_ix + best_len]) {	271

191 size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],	272 static void FN(Reset)(HashToBinaryTree* self) {

192 &ring_buffer[cur_ix_masked],	273 self->is_dirty_ = BROTLI_TRUE;

193 max_length);	274 }

194 if (len >= 4) {	275

195 best_score = BackwardReferenceScoreUsingLastDistance(len, 0);	276 static void FN(Initialize)(HashToBinaryTree* self) {

196 best_len = len;	277 self->forest_ = NULL;

197 *best_len_out = len;	278 self->forest_size_ = 0;

198 *best_len_code_out = len;	279 FN(Reset)(self);

199 *best_distance_out = cached_backward;	280 }

200 *best_score_out = best_score;	281

201 compare_char = ring_buffer[cur_ix_masked + best_len];	282 static void FN(Cleanup)(MemoryManager* m, HashToBinaryTree* self) {

202 if (kBucketSweep == 1) {	283 BROTLI_FREE(m, self->forest_);

203 buckets_[key] = static_cast<uint32_t>(cur_ix);	284 }

204 return true;	285

205 } else {	286 static void FN(Init)(

206 match_found = true;	287 MemoryManager* m, HashToBinaryTree* self, const uint8_t* data,

207 }	288 const BrotliEncoderParams* params, size_t position, size_t bytes,

	289 BROTLI_BOOL is_last) {

	290 if (self->is_dirty_) {

	291 uint32_t invalid_pos;

	292 size_t num_nodes;

	293 uint32_t i;

	294 BROTLI_UNUSED(data);

	295 self->window_mask_ = (1u << params->lgwin) - 1u;

	296 invalid_pos = (uint32_t)(0 - self->window_mask_);

	297 self->invalid_pos_ = invalid_pos;

	298 for (i = 0; i < BUCKET_SIZE; i++) {

	299 self->buckets_[i] = invalid_pos;

	300 }

	301 num_nodes = (position == 0 && is_last) ? bytes : self->window_mask_ + 1;

	302 if (num_nodes > self->forest_size_) {

	303 BROTLI_FREE(m, self->forest_);

	304 self->forest_ = BROTLI_ALLOC(m, uint32_t, 2 * num_nodes);

	305 if (BROTLI_IS_OOM(m)) return;

	306 self->forest_size_ = num_nodes;

	307 }

	308 self->is_dirty_ = BROTLI_FALSE;

	309 }

	310 }

	311

	312 static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,

	313 const size_t pos) {

	314 return 2 * (pos & self->window_mask_);

	315 }

	316

	317 static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,

	318 const size_t pos) {

	319 return 2 * (pos & self->window_mask_) + 1;

	320 }

	321

	322 /* Stores the hash of the next 4 bytes and in a single tree-traversal, the

	323 hash bucket's binary tree is searched for matches and is re-rooted at the

	324 current position.

	325

	326 If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the

	327 current position is searched for matches, but the state of the hash table

	328 is not changed, since we can not know the final sorting order of the

	329 current (incomplete) sequence.

	330

	331 This function must be called with increasing cur_ix positions. */

	332 static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(

	333 HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data,

	334 const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,

	335 const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,

	336 BackwardMatch* BROTLI_RESTRICT matches) {

	337 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

	338 const size_t max_comp_len =

	339 BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);

	340 const BROTLI_BOOL should_reroot_tree =

	341 TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);

	342 const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);

	343 size_t prev_ix = self->buckets_[key];

	344 /* The forest index of the rightmost node of the left subtree of the new

	345 root, updated as we traverse and re-root the tree of the hash bucket. */

	346 size_t node_left = FN(LeftChildIndex)(self, cur_ix);

	347 /* The forest index of the leftmost node of the right subtree of the new

	348 root, updated as we traverse and re-root the tree of the hash bucket. */

	349 size_t node_right = FN(RightChildIndex)(self, cur_ix);

	350 /* The match length of the rightmost node of the left subtree of the new

	351 root, updated as we traverse and re-root the tree of the hash bucket. */

	352 size_t best_len_left = 0;

	353 /* The match length of the leftmost node of the right subtree of the new

	354 root, updated as we traverse and re-root the tree of the hash bucket. */

	355 size_t best_len_right = 0;

	356 size_t depth_remaining;

	357 if (should_reroot_tree) {

	358 self->buckets_[key] = (uint32_t)cur_ix;

	359 }

	360 for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {

	361 const size_t backward = cur_ix - prev_ix;

	362 const size_t prev_ix_masked = prev_ix & ring_buffer_mask;

	363 if (backward == 0 \|\| backward > max_backward \|\| depth_remaining == 0) {

	364 if (should_reroot_tree) {

	365 self->forest_[node_left] = self->invalid_pos_;

	366 self->forest_[node_right] = self->invalid_pos_;

	367 }

	368 break;

	369 }

	370 {

	371 const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);

	372 size_t len;

	373 assert(cur_len <= MAX_TREE_COMP_LENGTH);

	374 len = cur_len +

	375 FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],

	376 &data[prev_ix_masked + cur_len],

	377 max_length - cur_len);

	378 assert(0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));

	379 if (matches && len > *best_len) {

	380 *best_len = len;

	381 InitBackwardMatch(matches++, backward, len);

	382 }

	383 if (len >= max_comp_len) {

	384 if (should_reroot_tree) {

	385 self->forest_[node_left] =

	386 self->forest_[FN(LeftChildIndex)(self, prev_ix)];

	387 self->forest_[node_right] =

	388 self->forest_[FN(RightChildIndex)(self, prev_ix)];

208 }	389 }

209 }

210 }

211 if (kBucketSweep == 1) {

212 // Only one to look for, don't bother to prepare for a loop.

213 prev_ix = buckets_[key];

214 buckets_[key] = static_cast<uint32_t>(cur_ix);

215 size_t backward = cur_ix - prev_ix;

216 prev_ix &= static_cast<uint32_t>(ring_buffer_mask);

217 if (compare_char != ring_buffer[prev_ix + best_len_in]) {

218 return false;

219 }

220 if (PREDICT_FALSE(backward == 0 \|\| backward > max_backward)) {

221 return false;

222 }

223 const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],

224 &ring_buffer[cur_ix_masked],

225 max_length);

226 if (len >= 4) {

227 *best_len_out = len;

228 *best_len_code_out = len;

229 *best_distance_out = backward;

230 *best_score_out = BackwardReferenceScore(len, backward);

231 return true;

232 }

233 } else {

234 uint32_t *bucket = buckets_ + key;

235 prev_ix = *bucket++;

236 for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {

237 const size_t backward = cur_ix - prev_ix;

238 prev_ix &= static_cast<uint32_t>(ring_buffer_mask);

239 if (compare_char != ring_buffer[prev_ix + best_len]) {

240 continue;

241 }

242 if (PREDICT_FALSE(backward == 0 \|\| backward > max_backward)) {

243 continue;

244 }

245 const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],

246 &ring_buffer[cur_ix_masked],

247 max_length);

248 if (len >= 4) {

249 const double score = BackwardReferenceScore(len, backward);

250 if (best_score < score) {

251 best_score = score;

252 best_len = len;

253 *best_len_out = best_len;

254 *best_len_code_out = best_len;

255 *best_distance_out = backward;

256 *best_score_out = score;

257 compare_char = ring_buffer[cur_ix_masked + best_len];

258 match_found = true;

259 }

260 }

261 }

262 }

263 if (kUseDictionary && !match_found &&

264 num_dict_matches_ >= (num_dict_lookups_ >> 7)) {

265 ++num_dict_lookups_;

266 const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;

267 const uint16_t v = kStaticDictionaryHash[dict_key];

268 if (v > 0) {

269 const uint32_t len = v & 31;

270 const uint32_t dist = v >> 5;

271 const size_t offset =

272 kBrotliDictionaryOffsetsByLength[len] + len * dist;

273 if (len <= max_length) {

274 const size_t matchlen =

275 FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],

276 &kBrotliDictionary[offset], len);

277 if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {

278 const size_t transform_id = kCutoffTransforms[len - matchlen];

279 const size_t word_id =

280 transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +

281 dist;

282 const size_t backward = max_backward + word_id + 1;

283 const double score = BackwardReferenceScore(matchlen, backward);

284 if (best_score < score) {

285 ++num_dict_matches_;

286 best_score = score;

287 best_len = matchlen;

288 *best_len_out = best_len;

289 *best_len_code_out = len;

290 *best_distance_out = backward;

291 *best_score_out = best_score;

292 match_found = true;

293 }

294 }

295 }

296 }

297 }

298 const uint32_t off = (cur_ix >> 3) % kBucketSweep;

299 buckets_[key + off] = static_cast<uint32_t>(cur_ix);

300 return match_found;

301 }

302

303 enum { kHashLength = 5 };

304 enum { kHashTypeLength = 8 };

305 // HashBytes is the function that chooses the bucket to place

306 // the address in. The HashLongestMatch and HashLongestMatchQuickly

307 // classes have separate, different implementations of hashing.

308 static uint32_t HashBytes(const uint8_t *data) {

309 // Computing a hash based on 5 bytes works much better for

310 // qualities 1 and 3, where the next hash value is likely to replace

311 uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;

312 // The higher bits contain more mixture from the multiplication,

313 // so we take our results from there.

314 return static_cast<uint32_t>(h >> (64 - kBucketBits));

315 }

316

317 enum { kHashMapSize = 4 << kBucketBits };

318

319 private:

320 static const uint32_t kBucketSize = 1 << kBucketBits;

321 uint32_t buckets_[kBucketSize + kBucketSweep];

322 // True if buckets_ array needs to be initialized.

323 bool need_init_;

324 size_t num_dict_lookups_;

325 size_t num_dict_matches_;

326 };

327

328 // A (forgetful) hash table to the data seen by the compressor, to

329 // help create backward references to previous data.

330 //

331 // This is a hash map of fixed size (kBucketSize) to a ring buffer of

332 // fixed size (kBlockSize). The ring buffer contains the last kBlockSize

333 // index positions of the given hash key in the compressed data.

334 template <int kBucketBits,

335 int kBlockBits,

336 int kNumLastDistancesToCheck>

337 class HashLongestMatch {

338 public:

339 HashLongestMatch(void) {

340 Reset();

341 }

342

343 void Reset(void) {

344 need_init_ = true;

345 num_dict_lookups_ = 0;

346 num_dict_matches_ = 0;

347 }

348

349 void Init(void) {

350 if (need_init_) {

351 memset(&num_[0], 0, sizeof(num_));

352 need_init_ = false;

353 }

354 }

355

356 void InitForData(const uint8_t* data, size_t num) {

357 for (size_t i = 0; i < num; ++i) {

358 const uint32_t key = HashBytes(&data[i]);

359 num_[key] = 0;

360 need_init_ = false;

361 }

362 }

363

364 // Look at 3 bytes at data.

365 // Compute a hash from these, and store the value of ix at that position.

366 inline void Store(const uint8_t *data, const uint32_t ix) {

367 const uint32_t key = HashBytes(data);

368 const int minor_ix = num_[key] & kBlockMask;

369 buckets_[key][minor_ix] = ix;

370 ++num_[key];

371 }

372

373 // Find a longest backward match of &data[cur_ix] up to the length of

374 // max_length and stores the position cur_ix in the hash table.

375 //

376 // Does not look for matches longer than max_length.

377 // Does not look for matches further away than max_backward.

378 // Writes the best found match length into best_len_out.

379 // Writes the index (&data[index]) offset from the start of the best match

380 // into best_distance_out.

381 // Write the score of the best match into best_score_out.

382 bool FindLongestMatch(const uint8_t * __restrict data,

383 const size_t ring_buffer_mask,

384 const int* __restrict distance_cache,

385 const size_t cur_ix,

386 const size_t max_length,

387 const size_t max_backward,

388 size_t * __restrict best_len_out,

389 size_t * __restrict best_len_code_out,

390 size_t * __restrict best_distance_out,

391 double * __restrict best_score_out) {

392 *best_len_code_out = 0;

393 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

394 bool match_found = false;

395 // Don't accept a short copy from far away.

396 double best_score = *best_score_out;

397 size_t best_len = *best_len_out;

398 *best_len_out = 0;

399 // Try last distance first.

400 for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {

401 const size_t idx = kDistanceCacheIndex[i];

402 const size_t backward =

403 static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);

404 size_t prev_ix = static_cast<size_t>(cur_ix - backward);

405 if (prev_ix >= cur_ix) {

406 continue;

407 }

408 if (PREDICT_FALSE(backward > max_backward)) {

409 continue;

410 }

411 prev_ix &= ring_buffer_mask;

412

413 if (cur_ix_masked + best_len > ring_buffer_mask \|\|

414 prev_ix + best_len > ring_buffer_mask \|\|

415 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {

416 continue;

417 }

418 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],

419 &data[cur_ix_masked],

420 max_length);

421 if (len >= 3 \|\| (len == 2 && i < 2)) {

422 // Comparing for >= 2 does not change the semantics, but just saves for

423 // a few unnecessary binary logarithms in backward reference score,

424 // since we are not interested in such short matches.

425 double score = BackwardReferenceScoreUsingLastDistance(len, i);

426 if (best_score < score) {

427 best_score = score;

428 best_len = len;

429 *best_len_out = best_len;

430 *best_len_code_out = best_len;

431 *best_distance_out = backward;

432 *best_score_out = best_score;

433 match_found = true;

434 }

435 }

436 }

437 const uint32_t key = HashBytes(&data[cur_ix_masked]);

438 const uint32_t * __restrict const bucket = &buckets_[key][0];

439 const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;

440 for (size_t i = num_[key]; i > down;) {

441 --i;

442 size_t prev_ix = bucket[i & kBlockMask];

443 const size_t backward = cur_ix - prev_ix;

444 if (PREDICT_FALSE(backward == 0 \|\| backward > max_backward)) {

445 break;	390 break;

446 }	391 }

447 prev_ix &= ring_buffer_mask;	392 if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {

448 if (cur_ix_masked + best_len > ring_buffer_mask \|\|	393 best_len_left = len;

449 prev_ix + best_len > ring_buffer_mask \|\|	394 if (should_reroot_tree) {

450 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {	395 self->forest_[node_left] = (uint32_t)prev_ix;

451 continue;	396 }

	397 node_left = FN(RightChildIndex)(self, prev_ix);

	398 prev_ix = self->forest_[node_left];

	399 } else {

	400 best_len_right = len;

	401 if (should_reroot_tree) {

	402 self->forest_[node_right] = (uint32_t)prev_ix;

	403 }

	404 node_right = FN(LeftChildIndex)(self, prev_ix);

	405 prev_ix = self->forest_[node_right];

452 }	406 }

453 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],	407 }

454 &data[cur_ix_masked],	408 }

455 max_length);	409 return matches;

456 if (len >= 4) {	410 }

457 // Comparing for >= 3 does not change the semantics, but just saves	411

458 // for a few unnecessary binary logarithms in backward reference	412 /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the

459 // score, since we are not interested in such short matches.	413 length of max_length and stores the position cur_ix in the hash table.

460 double score = BackwardReferenceScore(len, backward);	414

461 if (best_score < score) {	415 Sets *num_matches to the number of matches found, and stores the found

462 best_score = score;	416 matches in matches[0] to matches[*num_matches - 1]. The matches will be

463 best_len = len;	417 sorted by strictly increasing length and (non-strictly) increasing

464 *best_len_out = best_len;	418 distance. */

465 *best_len_code_out = best_len;	419 static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,

466 *best_distance_out = backward;	420 const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix,

467 *best_score_out = best_score;	421 const size_t max_length, const size_t max_backward,

468 match_found = true;	422 const BrotliEncoderParams* params, BackwardMatch* matches) {

469 }	423 BackwardMatch* const orig_matches = matches;

470 }	424 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

471 }	425 size_t best_len = 1;

472 buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);	426 const size_t short_match_max_backward =

473 ++num_[key];	427 params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;

474 if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {	428 size_t stop = cur_ix - short_match_max_backward;

475 size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;	429 uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];

476 for (int k = 0; k < 2; ++k, ++dict_key) {	430 size_t i;

477 ++num_dict_lookups_;	431 if (cur_ix < short_match_max_backward) { stop = 0; }

478 const uint16_t v = kStaticDictionaryHash[dict_key];	432 for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {

479 if (v > 0) {	433 size_t prev_ix = i;

480 const size_t len = v & 31;	434 const size_t backward = cur_ix - prev_ix;

481 const size_t dist = v >> 5;	435 if (BROTLI_PREDICT_FALSE(backward > max_backward)) {

482 const size_t offset =	436 break;

483 kBrotliDictionaryOffsetsByLength[len] + len * dist;	437 }

484 if (len <= max_length) {	438 prev_ix &= ring_buffer_mask;

485 const size_t matchlen =	439 if (data[cur_ix_masked] != data[prev_ix] \|\|

486 FindMatchLengthWithLimit(&data[cur_ix_masked],	440 data[cur_ix_masked + 1] != data[prev_ix + 1]) {

487 &kBrotliDictionary[offset], len);	441 continue;

488 if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {	442 }

489 const size_t transform_id = kCutoffTransforms[len - matchlen];	443 {

490 const size_t word_id =

491 transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +

492 dist;

493 const size_t backward = max_backward + word_id + 1;

494 double score = BackwardReferenceScore(matchlen, backward);

495 if (best_score < score) {

496 ++num_dict_matches_;

497 best_score = score;

498 best_len = matchlen;

499 *best_len_out = best_len;

500 *best_len_code_out = len;

501 *best_distance_out = backward;

502 *best_score_out = best_score;

503 match_found = true;

504 }

505 }

506 }

507 }

508 }

509 }

510 return match_found;

511 }

512

513 // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the

514 // length of max_length and stores the position cur_ix in the hash table.

515 //

516 // Sets *num_matches to the number of matches found, and stores the found

517 // matches in matches[0] to matches[*num_matches - 1]. The matches will be

518 // sorted by strictly increasing length and (non-strictly) increasing

519 // distance.

520 size_t FindAllMatches(const uint8_t* data,

521 const size_t ring_buffer_mask,

522 const size_t cur_ix,

523 const size_t max_length,

524 const size_t max_backward,

525 BackwardMatch* matches) {

526 BackwardMatch* const orig_matches = matches;

527 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;

528 size_t best_len = 1;

529 size_t stop = cur_ix - 64;

530 if (cur_ix < 64) { stop = 0; }

531 for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {

532 size_t prev_ix = i;

533 const size_t backward = cur_ix - prev_ix;

534 if (PREDICT_FALSE(backward > max_backward)) {

535 break;

536 }

537 prev_ix &= ring_buffer_mask;

538 if (data[cur_ix_masked] != data[prev_ix] \|\|

539 data[cur_ix_masked + 1] != data[prev_ix + 1]) {

540 continue;

541 }

542 const size_t len =	444 const size_t len =

543 FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],	445 FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],

544 max_length);	446 max_length);

545 if (len > best_len) {	447 if (len > best_len) {

546 best_len = len;	448 best_len = len;

547 *matches++ = BackwardMatch(backward, len);	449 InitBackwardMatch(matches++, backward, len);

548 }	450 }

549 }	451 }

550 const uint32_t key = HashBytes(&data[cur_ix_masked]);	452 }

551 const uint32_t * __restrict const bucket = &buckets_[key][0];	453 if (best_len < max_length) {

552 const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;	454 matches = FN(StoreAndFindMatches)(self, data, cur_ix, ring_buffer_mask,

553 for (size_t i = num_[key]; i > down;) {	455 max_length, max_backward, &best_len, matches);

554 --i;	456 }

555 size_t prev_ix = bucket[i & kBlockMask];	457 for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {

556 const size_t backward = cur_ix - prev_ix;	458 dict_matches[i] = kInvalidMatch;

557 if (PREDICT_FALSE(backward == 0 \|\| backward > max_backward)) {	459 }

558 break;	460 {

559 }	461 size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);

560 prev_ix &= ring_buffer_mask;	462 if (BrotliFindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen,

561 if (cur_ix_masked + best_len > ring_buffer_mask \|\|	463 max_length, &dict_matches[0])) {

562 prev_ix + best_len > ring_buffer_mask \|\|	464 size_t maxlen = BROTLI_MIN(

563 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {	465 size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);

564 continue;	466 size_t l;

565 }	467 for (l = minlen; l <= maxlen; ++l) {

566 const size_t len =

567 FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],

568 max_length);

569 if (len > best_len) {

570 best_len = len;

571 *matches++ = BackwardMatch(backward, len);

572 }

573 }

574 buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);

575 ++num_[key];

576 uint32_t dict_matches[kMaxDictionaryMatchLen + 1];

577 for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {

578 dict_matches[i] = kInvalidMatch;

579 }

580 size_t minlen = std::max<size_t>(4, best_len + 1);

581 if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,

582 &dict_matches[0])) {

583 size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);

584 for (size_t l = minlen; l <= maxlen; ++l) {

585 uint32_t dict_id = dict_matches[l];	468 uint32_t dict_id = dict_matches[l];

586 if (dict_id < kInvalidMatch) {	469 if (dict_id < kInvalidMatch) {

587 *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,	470 InitDictionaryBackwardMatch(matches++,

588 dict_id & 31);	471 max_backward + (dict_id >> 5) + 1, l, dict_id & 31);

589 }	472 }

590 }	473 }

591 }	474 }

592 return static_cast<size_t>(matches - orig_matches);	475 }

593 }	476 return (size_t)(matches - orig_matches);

594	477 }

595 enum { kHashLength = 4 };	478

596 enum { kHashTypeLength = 4 };	479 /* Stores the hash of the next 4 bytes and re-roots the binary tree at the

597	480 current sequence, without returning any matches.

598 // HashBytes is the function that chooses the bucket to place	481 REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */

599 // the address in. The HashLongestMatch and HashLongestMatchQuickly	482 static BROTLI_INLINE void FN(Store)(HashToBinaryTree* self, const uint8_t *data,

600 // classes have separate, different implementations of hashing.	483 const size_t mask, const size_t ix) {

601 static uint32_t HashBytes(const uint8_t *data) {	484 /* Maximum distance is window size - 16, see section 9.1. of the spec. */

602 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;	485 const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;

603 // The higher bits contain more mixture from the multiplication,	486 FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,

604 // so we take our results from there.	487 max_backward, NULL, NULL);

605 return h >> (32 - kBucketBits);	488 }

606 }	489

607	490 static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,

608 enum { kHashMapSize = 2 << kBucketBits };	491 const uint8_t *data, const size_t mask, const size_t ix_start,

609	492 const size_t ix_end) {

610 static const size_t kMaxNumMatches = 64 + (1 << kBlockBits);	493 size_t i = ix_start;

611	494 size_t j = ix_start;

612 private:	495 if (ix_start + 63 <= ix_end) {

613 // Number of hash buckets.	496 i = ix_end - 63;

614 static const uint32_t kBucketSize = 1 << kBucketBits;	497 }

615	498 if (ix_start + 512 <= i) {

616 // Only kBlockSize newest backward references are kept,	499 for (; j < i; j += 8) {

617 // and the older are forgotten.	500 FN(Store)(self, data, mask, j);

618 static const uint32_t kBlockSize = 1 << kBlockBits;	501 }

619	502 }

620 // Mask for accessing entries in a block (in a ringbuffer manner).	503 for (; i < ix_end; ++i) {

621 static const uint32_t kBlockMask = (1 << kBlockBits) - 1;	504 FN(Store)(self, data, mask, i);

622	505 }

623 // Number of entries in a particular bucket.	506 }

624 uint16_t num_[kBucketSize];	507

625	508 static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashToBinaryTree* self,

626 // Buckets containing kBlockSize of backward references.	509 size_t num_bytes, size_t position, const uint8_t* ringbuffer,

627 uint32_t buckets_[kBucketSize][kBlockSize];	510 size_t ringbuffer_mask) {

628	511 if (num_bytes >= FN(HashTypeLength)() - 1 &&

629 // True if num_ array needs to be initialized.	512 position >= MAX_TREE_COMP_LENGTH) {

630 bool need_init_;	513 /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.

631	514 These could not be calculated before, since they require knowledge

632 size_t num_dict_lookups_;	515 of both the previous and the current block. */

633 size_t num_dict_matches_;	516 const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;

634 };	517 const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);

635	518 size_t i;

636 // A (forgetful) hash table where each hash bucket contains a binary tree of	519 for (i = i_start; i < i_end; ++i) {

637 // sequences whose first 4 bytes share the same hash code.	520 /* Maximum distance is window size - 16, see section 9.1. of the spec.

638 // Each sequence is kMaxTreeCompLength long and is identified by its starting	521 Furthermore, we have to make sure that we don't look further back

639 // position in the input data. The binary tree is sorted by the lexicographic	522 from the start of the next block than the window size, otherwise we

640 // order of the sequences, and it is also a max-heap with respect to the	523 could access already overwritten areas of the ring-buffer. */

641 // starting positions.	524 const size_t max_backward =

642 class HashToBinaryTree {	525 self->window_mask_ - BROTLI_MAX(size_t,

643 public:	526 BROTLI_WINDOW_GAP - 1,

644 HashToBinaryTree() : forest_(NULL) {	527 position - i);

645 Reset();	528 /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the

646 }	529 end of the current block and that we have at least

647	530 MAX_TREE_COMP_LENGTH tail in the ring-buffer. */

648 ~HashToBinaryTree() {	531 FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,

649 delete[] forest_;	532 MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);

650 }	533 }

651	534 }

652 void Reset() {	535 }

653 need_init_ = true;	536

654 }	537 #undef BUCKET_SIZE

655	538 #undef BUCKET_BITS

656 void Init(int lgwin, size_t position, size_t bytes, bool is_last) {	539

657 if (need_init_) {	540 #undef HASHER

658 window_mask_ = (1u << lgwin) - 1u;	541

659 invalid_pos_ = static_cast<uint32_t>(0 - window_mask_);	542 /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression

660 for (uint32_t i = 0; i < kBucketSize; i++) {	543 a little faster (0.5% - 1%) and it compresses 0.15% better on small text

661 buckets_[i] = invalid_pos_;	544 and HTML inputs. */

662 }	545

663 size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1;	546 #define HASHER() H2

664 forest_ = new uint32_t[2 * num_nodes];	547 #define BUCKET_BITS 16

665 need_init_ = false;	548 #define BUCKET_SWEEP 1

666 }	549 #define USE_DICTIONARY 1

667 }	550 #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */

668	551 #undef BUCKET_SWEEP

669 // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the	552 #undef USE_DICTIONARY

670 // length of max_length and stores the position cur_ix in the hash table.	553 #undef HASHER

671 //	554

672 // Sets *num_matches to the number of matches found, and stores the found	555 #define HASHER() H3

673 // matches in matches[0] to matches[*num_matches - 1]. The matches will be	556 #define BUCKET_SWEEP 2

674 // sorted by strictly increasing length and (non-strictly) increasing	557 #define USE_DICTIONARY 0

675 // distance.	558 #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */

676 size_t FindAllMatches(const uint8_t* data,	559 #undef USE_DICTIONARY

677 const size_t ring_buffer_mask,	560 #undef BUCKET_SWEEP

678 const size_t cur_ix,	561 #undef BUCKET_BITS

679 const size_t max_length,	562 #undef HASHER

680 const size_t max_backward,	563

681 BackwardMatch* matches) {	564 #define HASHER() H4

682 BackwardMatch* const orig_matches = matches;	565 #define BUCKET_BITS 17

683 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;	566 #define BUCKET_SWEEP 4

684 size_t best_len = 1;	567 #define USE_DICTIONARY 1

685 size_t stop = cur_ix - 64;	568 #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */

686 if (cur_ix < 64) { stop = 0; }	569 #undef USE_DICTIONARY

687 for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {	570 #undef BUCKET_SWEEP

688 size_t prev_ix = i;	571 #undef BUCKET_BITS

689 const size_t backward = cur_ix - prev_ix;	572 #undef HASHER

690 if (PREDICT_FALSE(backward > max_backward)) {	573

691 break;	574 #define HASHER() H5

692 }	575 #define BUCKET_BITS 14

693 prev_ix &= ring_buffer_mask;	576 #define BLOCK_BITS 4

694 if (data[cur_ix_masked] != data[prev_ix] \|\|	577 #define NUM_LAST_DISTANCES_TO_CHECK 4

695 data[cur_ix_masked + 1] != data[prev_ix + 1]) {	578 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */

696 continue;	579 #undef BLOCK_BITS

697 }	580 #undef HASHER

698 const size_t len =	581

699 FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],	582 #define HASHER() H6

700 max_length);	583 #define BLOCK_BITS 5

701 if (len > best_len) {	584 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */

702 best_len = len;	585 #undef NUM_LAST_DISTANCES_TO_CHECK

703 *matches++ = BackwardMatch(backward, len);	586 #undef BLOCK_BITS

704 }	587 #undef BUCKET_BITS

705 }	588 #undef HASHER

706 if (best_len < max_length) {	589

707 matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,	590 #define HASHER() H7

708 max_length, &best_len, matches);	591 #define BUCKET_BITS 15

709 }	592 #define BLOCK_BITS 6

710 uint32_t dict_matches[kMaxDictionaryMatchLen + 1];	593 #define NUM_LAST_DISTANCES_TO_CHECK 10

711 for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {	594 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */

712 dict_matches[i] = kInvalidMatch;	595 #undef BLOCK_BITS

713 }	596 #undef HASHER

714 size_t minlen = std::max<size_t>(4, best_len + 1);	597

715 if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,	598 #define HASHER() H8

716 &dict_matches[0])) {	599 #define BLOCK_BITS 7

717 size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);	600 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */

718 for (size_t l = minlen; l <= maxlen; ++l) {	601 #undef NUM_LAST_DISTANCES_TO_CHECK

719 uint32_t dict_id = dict_matches[l];	602 #undef BLOCK_BITS

720 if (dict_id < kInvalidMatch) {	603 #undef HASHER

721 *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,	604

722 dict_id & 31);	605 #define HASHER() H9

723 }	606 #define BLOCK_BITS 8

724 }	607 #define NUM_LAST_DISTANCES_TO_CHECK 16

725 }	608 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */

726 return static_cast<size_t>(matches - orig_matches);	609 #undef NUM_LAST_DISTANCES_TO_CHECK

727 }	610 #undef BLOCK_BITS

728	611 #undef BUCKET_BITS

729 // Stores the hash of the next 4 bytes and re-roots the binary tree at the	612 #undef HASHER

730 // current sequence, without returning any matches.	613

731 // REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block	614 #define BUCKET_BITS 15

732 void Store(const uint8_t* data,	615

733 const size_t ring_buffer_mask,	616 #define NUM_LAST_DISTANCES_TO_CHECK 4

734 const size_t cur_ix) {	617 #define NUM_BANKS 1

735 size_t best_len = 0;	618 #define BANK_BITS 16

736 StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,	619 #define HASHER() H40

737 &best_len, NULL);	620 #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */

738 }	621 #undef HASHER

739	622 #undef NUM_LAST_DISTANCES_TO_CHECK

740 void StitchToPreviousBlock(size_t num_bytes,	623

741 size_t position,	624 #define NUM_LAST_DISTANCES_TO_CHECK 10

742 const uint8_t* ringbuffer,	625 #define HASHER() H41

743 size_t ringbuffer_mask) {	626 #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */

744 if (num_bytes >= 3 && position >= kMaxTreeCompLength) {	627 #undef HASHER

745 // Store the last `kMaxTreeCompLength - 1` positions in the hasher.	628 #undef NUM_LAST_DISTANCES_TO_CHECK

746 // These could not be calculated before, since they require knowledge	629 #undef NUM_BANKS

747 // of both the previous and the current block.	630 #undef BANK_BITS

748 const size_t i_start = position - kMaxTreeCompLength + 1;	631

749 const size_t i_end = std::min(position, i_start + num_bytes);	632 #define NUM_LAST_DISTANCES_TO_CHECK 16

750 for (size_t i = i_start; i < i_end; ++i) {	633 #define NUM_BANKS 512

751 // We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the	634 #define BANK_BITS 9

752 // end of the current block and that we have at least	635 #define HASHER() H42

753 // kMaxTreeCompLength tail in the ringbuffer.	636 #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */

754 Store(ringbuffer, ringbuffer_mask, i);	637 #undef HASHER

755 }	638 #undef NUM_LAST_DISTANCES_TO_CHECK

756 }	639 #undef NUM_BANKS

757 }	640 #undef BANK_BITS

758	641

759 static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;	642 #undef BUCKET_BITS

760	643

761 private:	644 #undef FN

762 // Stores the hash of the next 4 bytes and in a single tree-traversal, the	645 #undef CAT

763 // hash bucket's binary tree is searched for matches and is re-rooted at the	646 #undef EXPAND_CAT

764 // current position.	647

765 //	648 #define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(7) H(8) H(9) \

766 // If less than kMaxTreeCompLength data is available, the hash bucket of the	649 H(40) H(41) H(42)

767 // current position is searched for matches, but the state of the hash table	650 #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)

768 // is not changed, since we can not know the final sorting order of the	651

769 // current (incomplete) sequence.	652 typedef struct Hashers {

770 //	653 #define MEMBER_(N) H ## N* h ## N;

771 // This function must be called with increasing cur_ix positions.	654 FOR_ALL_HASHERS(MEMBER_)

772 BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data,	655 #undef MEMBER_

773 const size_t cur_ix,	656 } Hashers;

774 const size_t ring_buffer_mask,	657

775 const size_t max_length,	658 static BROTLI_INLINE void InitHashers(Hashers* self) {

776 size_t* const __restrict best_len,	659 #define INIT_(N) self->h ## N = 0;

777 BackwardMatch* __restrict matches) {	660 FOR_ALL_HASHERS(INIT_)

778 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;	661 #undef INIT_

779 const size_t max_backward = window_mask_ - 15;	662 }

780 const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength);	663

781 const bool reroot_tree = max_length >= kMaxTreeCompLength;	664 static BROTLI_INLINE void DestroyHashers(MemoryManager* m, Hashers* self) {

782 const uint32_t key = HashBytes(&data[cur_ix_masked]);	665 if (self->h10) CleanupH10(m, self->h10);

783 size_t prev_ix = buckets_[key];	666 #define CLEANUP_(N) BROTLI_FREE(m, self->h ## N)

784 // The forest index of the rightmost node of the left subtree of the new	667 FOR_ALL_HASHERS(CLEANUP_)

785 // root, updated as we traverse and reroot the tree of the hash bucket.	668 #undef CLEANUP_

786 size_t node_left = LeftChildIndex(cur_ix);	669 }

787 // The forest index of the leftmost node of the right subtree of the new	670

788 // root, updated as we traverse and reroot the tree of the hash bucket.	671 static BROTLI_INLINE void HashersReset(Hashers* self, int type) {

789 size_t node_right = RightChildIndex(cur_ix);	672 switch (type) {

790 // The match length of the rightmost node of the left subtree of the new	673 #define RESET_(N) case N: ResetH ## N(self->h ## N); break;

791 // root, updated as we traverse and reroot the tree of the hash bucket.	674 FOR_ALL_HASHERS(RESET_)

792 size_t best_len_left = 0;	675 #undef RESET_

793 // The match length of the leftmost node of the right subtree of the new	676 default: break;

794 // root, updated as we traverse and reroot the tree of the hash bucket.	677 }

795 size_t best_len_right = 0;	678 }

796 if (reroot_tree) {	679

797 buckets_[key] = static_cast<uint32_t>(cur_ix);	680 static BROTLI_INLINE void HashersSetup(

798 }	681 MemoryManager* m, Hashers* self, int type) {

799 for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) {	682 switch (type) {

800 const size_t backward = cur_ix - prev_ix;	683 #define SETUP_(N) case N: self->h ## N = BROTLI_ALLOC(m, H ## N, 1); break;

801 const size_t prev_ix_masked = prev_ix & ring_buffer_mask;	684 FOR_ALL_HASHERS(SETUP_)

802 if (backward == 0 \|\| backward > max_backward \|\| depth_remaining == 0) {	685 #undef SETUP_

803 if (reroot_tree) {	686 default: break;

804 forest_[node_left] = invalid_pos_;	687 }

805 forest_[node_right] = invalid_pos_;	688 if (BROTLI_IS_OOM(m)) return;

806 }	689 if (type == 10) InitializeH10(self->h10);

807 break;	690 HashersReset(self, type);

808 }	691 }

809 const size_t cur_len = std::min(best_len_left, best_len_right);	692

810 const size_t len = cur_len +	693 #define WARMUP_HASH_(N) \

811 FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],	694 static BROTLI_INLINE void WarmupHashH ## N(MemoryManager* m, \

812 &data[prev_ix_masked + cur_len],	695 const BrotliEncoderParams* params, const size_t size, const uint8_t* dict, \

813 max_length - cur_len);	696 H ## N* hasher) { \

814 if (len > *best_len) {	697 size_t overlap = (StoreLookaheadH ## N()) - 1; \

815 *best_len = len;	698 size_t i; \

816 if (matches) {	699 InitH ## N(m, hasher, dict, params, 0, size, BROTLI_FALSE); \

817 *matches++ = BackwardMatch(backward, len);	700 if (BROTLI_IS_OOM(m)) return; \

818 }	701 for (i = 0; i + overlap < size; i++) { \

819 if (len >= max_comp_len) {	702 StoreH ## N(hasher, dict, ~(size_t)0, i); \

820 if (reroot_tree) {	703 } \

821 forest_[node_left] = forest_[LeftChildIndex(prev_ix)];	704 }

822 forest_[node_right] = forest_[RightChildIndex(prev_ix)];	705 FOR_ALL_HASHERS(WARMUP_HASH_)

823 }	706 #undef WARMUP_HASH_

824 break;	707

825 }	708 /* Custom LZ77 window. */

826 }	709 static BROTLI_INLINE void HashersPrependCustomDictionary(

827 if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {	710 MemoryManager* m, Hashers* self, const BrotliEncoderParams* params,

828 best_len_left = len;	711 const size_t size, const uint8_t* dict) {

829 if (reroot_tree) {	712 int hasher_type = ChooseHasher(params);

830 forest_[node_left] = static_cast<uint32_t>(prev_ix);	713 switch (hasher_type) {

831 }	714 #define PREPEND_(N) \

832 node_left = RightChildIndex(prev_ix);	715 case N: WarmupHashH ## N(m, params, size, dict, self->h ## N); break;

833 prev_ix = forest_[node_left];	716 FOR_ALL_HASHERS(PREPEND_)

834 } else {	717 #undef PREPEND_

835 best_len_right = len;	718 default: break;

836 if (reroot_tree) {	719 }

837 forest_[node_right] = static_cast<uint32_t>(prev_ix);	720 if (BROTLI_IS_OOM(m)) return;

838 }	721 }

839 node_right = LeftChildIndex(prev_ix);	722

840 prev_ix = forest_[node_right];	723

841 }	724 #if defined(__cplusplus) \|\| defined(c_plusplus)

842 }	725 } /* extern "C" */

843 return matches;	726 #endif

844 }	727

845	728 #endif /* BROTLI_ENC_HASH_H_ */

846 inline size_t LeftChildIndex(const size_t pos) {

847 return 2 * (pos & window_mask_);

848 }

849

850 inline size_t RightChildIndex(const size_t pos) {

851 return 2 * (pos & window_mask_) + 1;

852 }

853

854 static uint32_t HashBytes(const uint8_t *data) {

855 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;

856 // The higher bits contain more mixture from the multiplication,

857 // so we take our results from there.

858 return h >> (32 - kBucketBits);

859 }

860

861 static const int kBucketBits = 17;

862 static const size_t kBucketSize = 1 << kBucketBits;

863

864 // The window size minus 1

865 size_t window_mask_;

866

867 // Hash table that maps the 4-byte hashes of the sequence to the last

868 // position where this hash was found, which is the root of the binary

869 // tree of sequences that share this hash bucket.

870 uint32_t buckets_[kBucketSize];

871

872 // The union of the binary trees of each hash bucket. The root of the tree

873 // corresponding to a hash is a sequence starting at buckets_[hash] and

874 // the left and right children of a sequence starting at pos are

875 // forest_[2 * pos] and forest_[2 * pos + 1].

876 uint32_t* forest_;

877

878 // A position used to mark a non-existent sequence, i.e. a tree is empty if

879 // its root is at invalid_pos_ and a node is a leaf if both its children

880 // are at invalid_pos_.

881 uint32_t invalid_pos_;

882

883 bool need_init_;

884 };

885

886 struct Hashers {

887 // For kBucketSweep == 1, enabling the dictionary lookup makes compression

888 // a little faster (0.5% - 1%) and it compresses 0.15% better on small text

889 // and html inputs.

890 typedef HashLongestMatchQuickly<16, 1, true> H2;

891 typedef HashLongestMatchQuickly<16, 2, false> H3;

892 typedef HashLongestMatchQuickly<17, 4, true> H4;

893 typedef HashLongestMatch<14, 4, 4> H5;

894 typedef HashLongestMatch<14, 5, 4> H6;

895 typedef HashLongestMatch<15, 6, 10> H7;

896 typedef HashLongestMatch<15, 7, 10> H8;

897 typedef HashLongestMatch<15, 8, 16> H9;

898 typedef HashToBinaryTree H10;

899

900 Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),

901 hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {}

902

903 ~Hashers(void) {

904 delete hash_h2;

905 delete hash_h3;

906 delete hash_h4;

907 delete hash_h5;

908 delete hash_h6;

909 delete hash_h7;

910 delete hash_h8;

911 delete hash_h9;

912 delete hash_h10;

913 }

914

915 void Init(int type) {

916 switch (type) {

917 case 2: hash_h2 = new H2; break;

918 case 3: hash_h3 = new H3; break;

919 case 4: hash_h4 = new H4; break;

920 case 5: hash_h5 = new H5; break;

921 case 6: hash_h6 = new H6; break;

922 case 7: hash_h7 = new H7; break;

923 case 8: hash_h8 = new H8; break;

924 case 9: hash_h9 = new H9; break;

925 case 10: hash_h10 = new H10; break;

926 default: break;

927 }

928 }

929

930 template<typename Hasher>

931 void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {

932 hasher->Init();

933 for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {

934 hasher->Store(&dict[i], static_cast<uint32_t>(i));

935 }

936 }

937

938 // Custom LZ77 window.

939 void PrependCustomDictionary(

940 int type, int lgwin, const size_t size, const uint8_t* dict) {

941 switch (type) {

942 case 2: WarmupHash(size, dict, hash_h2); break;

943 case 3: WarmupHash(size, dict, hash_h3); break;

944 case 4: WarmupHash(size, dict, hash_h4); break;

945 case 5: WarmupHash(size, dict, hash_h5); break;

946 case 6: WarmupHash(size, dict, hash_h6); break;

947 case 7: WarmupHash(size, dict, hash_h7); break;

948 case 8: WarmupHash(size, dict, hash_h8); break;

949 case 9: WarmupHash(size, dict, hash_h9); break;

950 case 10:

951 hash_h10->Init(lgwin, 0, size, false);

952 for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {

953 hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);

954 }

955 break;

956 default: break;

957 }

958 }

959

960

961 H2* hash_h2;

962 H3* hash_h3;

963 H4* hash_h4;

964 H5* hash_h5;

965 H6* hash_h6;

966 H7* hash_h7;

967 H8* hash_h8;

968 H9* hash_h9;

969 H10* hash_h10;

970 };

971

972 } // namespace brotli

973

974 #endif // BROTLI_ENC_HASH_H_

OLD	NEW

« chrome/browser/ui/webui/about_ui.cc ('K') | « third_party/brotli/enc/find_match_length.h ('k') | third_party/brotli/enc/hash_forgetful_chain_inc.h » ('j') | no next file with comments »