OLD | NEW |
1 /* Copyright 2010 Google Inc. All Rights Reserved. | 1 /* Copyright 2010 Google Inc. All Rights Reserved. |
2 | 2 |
3 Distributed under MIT license. | 3 Distributed under MIT license. |
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 */ | 5 */ |
6 | 6 |
7 // A (forgetful) hash table to the data seen by the compressor, to | 7 /* A (forgetful) hash table to the data seen by the compressor, to |
8 // help create backward references to previous data. | 8 help create backward references to previous data. */ |
9 | 9 |
10 #ifndef BROTLI_ENC_HASH_H_ | 10 #ifndef BROTLI_ENC_HASH_H_ |
11 #define BROTLI_ENC_HASH_H_ | 11 #define BROTLI_ENC_HASH_H_ |
12 | 12 |
13 #include <sys/types.h> | 13 #include <string.h> /* memcmp, memset */ |
14 #include <algorithm> | |
15 #include <cstring> | |
16 #include <limits> | |
17 | 14 |
| 15 #include "../common/constants.h" |
| 16 #include "../common/dictionary.h" |
| 17 #include <brotli/types.h> |
18 #include "./dictionary_hash.h" | 18 #include "./dictionary_hash.h" |
19 #include "./fast_log.h" | 19 #include "./fast_log.h" |
20 #include "./find_match_length.h" | 20 #include "./find_match_length.h" |
| 21 #include "./memory.h" |
21 #include "./port.h" | 22 #include "./port.h" |
22 #include "./prefix.h" | 23 #include "./quality.h" |
23 #include "./static_dict.h" | 24 #include "./static_dict.h" |
24 #include "./transform.h" | |
25 #include "./types.h" | |
26 | 25 |
27 namespace brotli { | 26 #if defined(__cplusplus) || defined(c_plusplus) |
| 27 extern "C" { |
| 28 #endif |
28 | 29 |
29 static const size_t kMaxTreeSearchDepth = 64; | 30 #define MAX_TREE_SEARCH_DEPTH 64 |
30 static const size_t kMaxTreeCompLength = 128; | 31 #define MAX_TREE_COMP_LENGTH 128 |
| 32 #define score_t size_t |
31 | 33 |
32 static const uint32_t kDistanceCacheIndex[] = { | 34 static const uint32_t kDistanceCacheIndex[] = { |
33 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, | 35 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, |
34 }; | 36 }; |
35 static const int kDistanceCacheOffset[] = { | 37 static const int kDistanceCacheOffset[] = { |
36 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3 | 38 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3 |
37 }; | 39 }; |
38 | 40 |
39 static const uint32_t kCutoffTransformsCount = 10; | 41 static const uint32_t kCutoffTransformsCount = 10; |
40 static const uint8_t kCutoffTransforms[] = { | 42 static const uint8_t kCutoffTransforms[] = { |
41 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 | 43 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 |
42 }; | 44 }; |
43 | 45 |
44 // kHashMul32 multiplier has these properties: | 46 typedef struct HasherSearchResult { |
45 // * The multiplier must be odd. Otherwise we may lose the highest bit. | 47 size_t len; |
46 // * No long streaks of 1s or 0s. | 48 size_t len_x_code; /* == len ^ len_code */ |
47 // * There is no effort to ensure that it is a prime, the oddity is enough | 49 size_t distance; |
48 // for this use. | 50 score_t score; |
49 // * The number has been tuned heuristically against compression benchmarks. | 51 } HasherSearchResult; |
| 52 |
| 53 typedef struct DictionarySearchStatictics { |
| 54 size_t num_lookups; |
| 55 size_t num_matches; |
| 56 } DictionarySearchStatictics; |
| 57 |
| 58 /* kHashMul32 multiplier has these properties: |
| 59 * The multiplier must be odd. Otherwise we may lose the highest bit. |
| 60 * No long streaks of ones or zeros. |
| 61 * There is no effort to ensure that it is a prime, the oddity is enough |
| 62 for this use. |
| 63 * The number has been tuned heuristically against compression benchmarks. */ |
50 static const uint32_t kHashMul32 = 0x1e35a7bd; | 64 static const uint32_t kHashMul32 = 0x1e35a7bd; |
51 | 65 |
52 template<int kShiftBits> | 66 static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) { |
53 inline uint32_t Hash(const uint8_t *data) { | |
54 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32; | 67 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32; |
55 // The higher bits contain more mixture from the multiplication, | 68 /* The higher bits contain more mixture from the multiplication, |
56 // so we take our results from there. | 69 so we take our results from there. */ |
57 return h >> (32 - kShiftBits); | 70 return h >> (32 - 14); |
58 } | 71 } |
59 | 72 |
60 // Usually, we always choose the longest backward reference. This function | 73 #define BROTLI_LITERAL_BYTE_SCORE 540 |
61 // allows for the exception of that rule. | 74 #define BROTLI_DISTANCE_BIT_PENALTY 120 |
62 // | 75 /* Score must be positive after applying maximal penalty. */ |
63 // If we choose a backward reference that is further away, it will | 76 #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t)) |
64 // usually be coded with more bits. We approximate this by assuming | 77 |
65 // log2(distance). If the distance can be expressed in terms of the | 78 /* Usually, we always choose the longest backward reference. This function |
66 // last four distances, we use some heuristic constants to estimate | 79 allows for the exception of that rule. |
67 // the bits cost. For the first up to four literals we use the bit | 80 |
68 // cost of the literals from the literal cost model, after that we | 81 If we choose a backward reference that is further away, it will |
69 // use the average bit cost of the cost model. | 82 usually be coded with more bits. We approximate this by assuming |
70 // | 83 log2(distance). If the distance can be expressed in terms of the |
71 // This function is used to sometimes discard a longer backward reference | 84 last four distances, we use some heuristic constants to estimate |
72 // when it is not much longer and the bit cost for encoding it is more | 85 the bits cost. For the first up to four literals we use the bit |
73 // than the saved literals. | 86 cost of the literals from the literal cost model, after that we |
74 // | 87 use the average bit cost of the cost model. |
75 // backward_reference_offset MUST be positive. | 88 |
76 inline double BackwardReferenceScore(size_t copy_length, | 89 This function is used to sometimes discard a longer backward reference |
77 size_t backward_reference_offset) { | 90 when it is not much longer and the bit cost for encoding it is more |
78 return 5.4 * static_cast<double>(copy_length) - | 91 than the saved literals. |
79 1.20 * Log2FloorNonZero(backward_reference_offset); | 92 |
80 } | 93 backward_reference_offset MUST be positive. */ |
81 | 94 static BROTLI_INLINE score_t BackwardReferenceScore( |
82 inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length, | 95 size_t copy_length, size_t backward_reference_offset) { |
83 size_t distance_short_code) { | 96 return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length - |
84 static const double kDistanceShortCodeBitCost[16] = { | 97 BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset); |
85 -0.6, 0.95, 1.17, 1.27, | 98 } |
86 0.93, 0.93, 0.96, 0.96, 0.99, 0.99, | 99 |
87 1.05, 1.05, 1.15, 1.15, 1.25, 1.25 | 100 static const score_t kDistanceShortCodeCost[BROTLI_NUM_DISTANCE_SHORT_CODES] = { |
88 }; | 101 /* Repeat last */ |
89 return 5.4 * static_cast<double>(copy_length) - | 102 BROTLI_SCORE_BASE + 60, |
90 kDistanceShortCodeBitCost[distance_short_code]; | 103 /* 2nd, 3rd, 4th last */ |
91 } | 104 BROTLI_SCORE_BASE - 95, |
92 | 105 BROTLI_SCORE_BASE - 117, |
93 struct BackwardMatch { | 106 BROTLI_SCORE_BASE - 127, |
94 BackwardMatch(void) : distance(0), length_and_code(0) {} | 107 /* Last with offset */ |
95 | 108 BROTLI_SCORE_BASE - 93, |
96 BackwardMatch(size_t dist, size_t len) | 109 BROTLI_SCORE_BASE - 93, |
97 : distance(static_cast<uint32_t>(dist)) | 110 BROTLI_SCORE_BASE - 96, |
98 , length_and_code(static_cast<uint32_t>(len << 5)) {} | 111 BROTLI_SCORE_BASE - 96, |
99 | 112 BROTLI_SCORE_BASE - 99, |
100 BackwardMatch(size_t dist, size_t len, size_t len_code) | 113 BROTLI_SCORE_BASE - 99, |
101 : distance(static_cast<uint32_t>(dist)) | 114 /* 2nd last with offset */ |
102 , length_and_code(static_cast<uint32_t>( | 115 BROTLI_SCORE_BASE - 105, |
103 (len << 5) | (len == len_code ? 0 : len_code))) {} | 116 BROTLI_SCORE_BASE - 105, |
104 | 117 BROTLI_SCORE_BASE - 115, |
105 size_t length(void) const { | 118 BROTLI_SCORE_BASE - 115, |
106 return length_and_code >> 5; | 119 BROTLI_SCORE_BASE - 125, |
107 } | 120 BROTLI_SCORE_BASE - 125 |
108 size_t length_code(void) const { | 121 }; |
109 size_t code = length_and_code & 31; | 122 |
110 return code ? code : length(); | 123 static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance( |
111 } | 124 size_t copy_length, size_t distance_short_code) { |
112 | 125 return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length + |
| 126 kDistanceShortCodeCost[distance_short_code]; |
| 127 } |
| 128 |
| 129 static BROTLI_INLINE void DictionarySearchStaticticsReset( |
| 130 DictionarySearchStatictics* self) { |
| 131 self->num_lookups = 0; |
| 132 self->num_matches = 0; |
| 133 } |
| 134 |
| 135 static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( |
| 136 size_t item, const uint8_t* data, size_t max_length, size_t max_backward, |
| 137 HasherSearchResult* out) { |
| 138 size_t len; |
| 139 size_t dist; |
| 140 size_t offset; |
| 141 size_t matchlen; |
| 142 size_t backward; |
| 143 score_t score; |
| 144 len = item & 31; |
| 145 dist = item >> 5; |
| 146 offset = kBrotliDictionaryOffsetsByLength[len] + len * dist; |
| 147 if (len > max_length) { |
| 148 return BROTLI_FALSE; |
| 149 } |
| 150 |
| 151 matchlen = FindMatchLengthWithLimit(data, &kBrotliDictionary[offset], len); |
| 152 if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) { |
| 153 return BROTLI_FALSE; |
| 154 } |
| 155 { |
| 156 size_t transform_id = kCutoffTransforms[len - matchlen]; |
| 157 backward = max_backward + dist + 1 + |
| 158 (transform_id << kBrotliDictionarySizeBitsByLength[len]); |
| 159 } |
| 160 score = BackwardReferenceScore(matchlen, backward); |
| 161 if (score < out->score) { |
| 162 return BROTLI_FALSE; |
| 163 } |
| 164 out->len = matchlen; |
| 165 out->len_x_code = len ^ matchlen; |
| 166 out->distance = backward; |
| 167 out->score = score; |
| 168 return BROTLI_TRUE; |
| 169 } |
| 170 |
| 171 static BROTLI_INLINE BROTLI_BOOL SearchInStaticDictionary( |
| 172 DictionarySearchStatictics* self, const uint8_t* data, size_t max_length, |
| 173 size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) { |
| 174 size_t key; |
| 175 size_t i; |
| 176 BROTLI_BOOL is_match_found = BROTLI_FALSE; |
| 177 if (self->num_matches < (self->num_lookups >> 7)) { |
| 178 return BROTLI_FALSE; |
| 179 } |
| 180 key = Hash14(data) << 1; |
| 181 for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) { |
| 182 size_t item = kStaticDictionaryHash[key]; |
| 183 self->num_lookups++; |
| 184 if (item != 0 && |
| 185 TestStaticDictionaryItem(item, data, max_length, max_backward, out)) { |
| 186 self->num_matches++; |
| 187 is_match_found = BROTLI_TRUE; |
| 188 } |
| 189 } |
| 190 return is_match_found; |
| 191 } |
| 192 |
| 193 typedef struct BackwardMatch { |
113 uint32_t distance; | 194 uint32_t distance; |
114 uint32_t length_and_code; | 195 uint32_t length_and_code; |
115 }; | 196 } BackwardMatch; |
116 | 197 |
117 // A (forgetful) hash table to the data seen by the compressor, to | 198 static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self, |
118 // help create backward references to previous data. | 199 size_t dist, size_t len) { |
119 // | 200 self->distance = (uint32_t)dist; |
120 // This is a hash map of fixed size (kBucketSize). Starting from the | 201 self->length_and_code = (uint32_t)(len << 5); |
121 // given index, kBucketSweep buckets are used to store values of a key. | 202 } |
122 template <int kBucketBits, int kBucketSweep, bool kUseDictionary> | 203 |
123 class HashLongestMatchQuickly { | 204 static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self, |
124 public: | 205 size_t dist, size_t len, size_t len_code) { |
125 HashLongestMatchQuickly(void) { | 206 self->distance = (uint32_t)dist; |
126 Reset(); | 207 self->length_and_code = |
127 } | 208 (uint32_t)((len << 5) | (len == len_code ? 0 : len_code)); |
128 void Reset(void) { | 209 } |
129 need_init_ = true; | 210 |
130 num_dict_lookups_ = 0; | 211 static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) { |
131 num_dict_matches_ = 0; | 212 return self->length_and_code >> 5; |
132 } | 213 } |
133 void Init(void) { | 214 |
134 if (need_init_) { | 215 static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { |
135 // It is not strictly necessary to fill this buffer here, but | 216 size_t code = self->length_and_code & 31; |
136 // not filling will make the results of the compression stochastic | 217 return code ? code : BackwardMatchLength(self); |
137 // (but correct). This is because random data would cause the | 218 } |
138 // system to find accidentally good backward references here and there. | 219 |
139 memset(&buckets_[0], 0, sizeof(buckets_)); | 220 #define EXPAND_CAT(a, b) CAT(a, b) |
140 need_init_ = false; | 221 #define CAT(a, b) a ## b |
141 } | 222 #define FN(X) EXPAND_CAT(X, HASHER()) |
142 } | 223 |
143 void InitForData(const uint8_t* data, size_t num) { | 224 #define MAX_NUM_MATCHES_H10 (64 + MAX_TREE_SEARCH_DEPTH) |
144 for (size_t i = 0; i < num; ++i) { | 225 |
145 const uint32_t key = HashBytes(&data[i]); | 226 #define HASHER() H10 |
146 memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0])); | 227 #define HashToBinaryTree HASHER() |
147 need_init_ = false; | 228 |
148 } | 229 #define BUCKET_BITS 17 |
149 } | 230 #define BUCKET_SIZE (1 << BUCKET_BITS) |
150 // Look at 4 bytes at data. | 231 |
151 // Compute a hash from these, and store the value somewhere within | 232 static size_t FN(HashTypeLength)(void) { return 4; } |
152 // [ix .. ix+3]. | 233 static size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; } |
153 inline void Store(const uint8_t *data, const uint32_t ix) { | 234 |
154 const uint32_t key = HashBytes(data); | 235 static uint32_t FN(HashBytes)(const uint8_t *data) { |
155 // Wiggle the value with the bucket sweep range. | 236 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32; |
156 const uint32_t off = (ix >> 3) % kBucketSweep; | 237 /* The higher bits contain more mixture from the multiplication, |
157 buckets_[key + off] = ix; | 238 so we take our results from there. */ |
158 } | 239 return h >> (32 - BUCKET_BITS); |
159 | 240 } |
160 // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask] | 241 |
161 // up to the length of max_length and stores the position cur_ix in the | 242 /* A (forgetful) hash table where each hash bucket contains a binary tree of |
162 // hash table. | 243 sequences whose first 4 bytes share the same hash code. |
163 // | 244 Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting |
164 // Does not look for matches longer than max_length. | 245 position in the input data. The binary tree is sorted by the lexicographic |
165 // Does not look for matches further away than max_backward. | 246 order of the sequences, and it is also a max-heap with respect to the |
166 // Writes the best found match length into best_len_out. | 247 starting positions. */ |
167 // Writes the index (&data[index]) of the start of the best match into | 248 typedef struct HashToBinaryTree { |
168 // best_distance_out. | 249 /* The window size minus 1 */ |
169 inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer, | 250 size_t window_mask_; |
170 const size_t ring_buffer_mask, | 251 |
171 const int* __restrict distance_cache, | 252 /* Hash table that maps the 4-byte hashes of the sequence to the last |
172 const size_t cur_ix, | 253 position where this hash was found, which is the root of the binary |
173 const size_t max_length, | 254 tree of sequences that share this hash bucket. */ |
174 const size_t max_backward, | 255 uint32_t buckets_[BUCKET_SIZE]; |
175 size_t * __restrict best_len_out, | 256 |
176 size_t * __restrict best_len_code_out, | 257 /* The union of the binary trees of each hash bucket. The root of the tree |
177 size_t * __restrict best_distance_out, | 258 corresponding to a hash is a sequence starting at buckets_[hash] and |
178 double* __restrict best_score_out) { | 259 the left and right children of a sequence starting at pos are |
179 const size_t best_len_in = *best_len_out; | 260 forest_[2 * pos] and forest_[2 * pos + 1]. */ |
180 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; | 261 uint32_t* forest_; |
181 const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]); | 262 |
182 int compare_char = ring_buffer[cur_ix_masked + best_len_in]; | 263 /* A position used to mark a non-existent sequence, i.e. a tree is empty if |
183 double best_score = *best_score_out; | 264 its root is at invalid_pos_ and a node is a leaf if both its children |
184 size_t best_len = best_len_in; | 265 are at invalid_pos_. */ |
185 size_t cached_backward = static_cast<size_t>(distance_cache[0]); | 266 uint32_t invalid_pos_; |
186 size_t prev_ix = cur_ix - cached_backward; | 267 |
187 bool match_found = false; | 268 size_t forest_size_; |
188 if (prev_ix < cur_ix) { | 269 BROTLI_BOOL is_dirty_; |
189 prev_ix &= static_cast<uint32_t>(ring_buffer_mask); | 270 } HashToBinaryTree; |
190 if (compare_char == ring_buffer[prev_ix + best_len]) { | 271 |
191 size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix], | 272 static void FN(Reset)(HashToBinaryTree* self) { |
192 &ring_buffer[cur_ix_masked], | 273 self->is_dirty_ = BROTLI_TRUE; |
193 max_length); | 274 } |
194 if (len >= 4) { | 275 |
195 best_score = BackwardReferenceScoreUsingLastDistance(len, 0); | 276 static void FN(Initialize)(HashToBinaryTree* self) { |
196 best_len = len; | 277 self->forest_ = NULL; |
197 *best_len_out = len; | 278 self->forest_size_ = 0; |
198 *best_len_code_out = len; | 279 FN(Reset)(self); |
199 *best_distance_out = cached_backward; | 280 } |
200 *best_score_out = best_score; | 281 |
201 compare_char = ring_buffer[cur_ix_masked + best_len]; | 282 static void FN(Cleanup)(MemoryManager* m, HashToBinaryTree* self) { |
202 if (kBucketSweep == 1) { | 283 BROTLI_FREE(m, self->forest_); |
203 buckets_[key] = static_cast<uint32_t>(cur_ix); | 284 } |
204 return true; | 285 |
205 } else { | 286 static void FN(Init)( |
206 match_found = true; | 287 MemoryManager* m, HashToBinaryTree* self, const uint8_t* data, |
207 } | 288 const BrotliEncoderParams* params, size_t position, size_t bytes, |
| 289 BROTLI_BOOL is_last) { |
| 290 if (self->is_dirty_) { |
| 291 uint32_t invalid_pos; |
| 292 size_t num_nodes; |
| 293 uint32_t i; |
| 294 BROTLI_UNUSED(data); |
| 295 self->window_mask_ = (1u << params->lgwin) - 1u; |
| 296 invalid_pos = (uint32_t)(0 - self->window_mask_); |
| 297 self->invalid_pos_ = invalid_pos; |
| 298 for (i = 0; i < BUCKET_SIZE; i++) { |
| 299 self->buckets_[i] = invalid_pos; |
| 300 } |
| 301 num_nodes = (position == 0 && is_last) ? bytes : self->window_mask_ + 1; |
| 302 if (num_nodes > self->forest_size_) { |
| 303 BROTLI_FREE(m, self->forest_); |
| 304 self->forest_ = BROTLI_ALLOC(m, uint32_t, 2 * num_nodes); |
| 305 if (BROTLI_IS_OOM(m)) return; |
| 306 self->forest_size_ = num_nodes; |
| 307 } |
| 308 self->is_dirty_ = BROTLI_FALSE; |
| 309 } |
| 310 } |
| 311 |
| 312 static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self, |
| 313 const size_t pos) { |
| 314 return 2 * (pos & self->window_mask_); |
| 315 } |
| 316 |
| 317 static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self, |
| 318 const size_t pos) { |
| 319 return 2 * (pos & self->window_mask_) + 1; |
| 320 } |
| 321 |
| 322 /* Stores the hash of the next 4 bytes and in a single tree-traversal, the |
| 323 hash bucket's binary tree is searched for matches and is re-rooted at the |
| 324 current position. |
| 325 |
| 326 If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the |
| 327 current position is searched for matches, but the state of the hash table |
| 328 is not changed, since we can not know the final sorting order of the |
| 329 current (incomplete) sequence. |
| 330 |
| 331 This function must be called with increasing cur_ix positions. */ |
| 332 static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)( |
| 333 HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data, |
| 334 const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length, |
| 335 const size_t max_backward, size_t* const BROTLI_RESTRICT best_len, |
| 336 BackwardMatch* BROTLI_RESTRICT matches) { |
| 337 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; |
| 338 const size_t max_comp_len = |
| 339 BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH); |
| 340 const BROTLI_BOOL should_reroot_tree = |
| 341 TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH); |
| 342 const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]); |
| 343 size_t prev_ix = self->buckets_[key]; |
| 344 /* The forest index of the rightmost node of the left subtree of the new |
| 345 root, updated as we traverse and re-root the tree of the hash bucket. */ |
| 346 size_t node_left = FN(LeftChildIndex)(self, cur_ix); |
| 347 /* The forest index of the leftmost node of the right subtree of the new |
| 348 root, updated as we traverse and re-root the tree of the hash bucket. */ |
| 349 size_t node_right = FN(RightChildIndex)(self, cur_ix); |
| 350 /* The match length of the rightmost node of the left subtree of the new |
| 351 root, updated as we traverse and re-root the tree of the hash bucket. */ |
| 352 size_t best_len_left = 0; |
| 353 /* The match length of the leftmost node of the right subtree of the new |
| 354 root, updated as we traverse and re-root the tree of the hash bucket. */ |
| 355 size_t best_len_right = 0; |
| 356 size_t depth_remaining; |
| 357 if (should_reroot_tree) { |
| 358 self->buckets_[key] = (uint32_t)cur_ix; |
| 359 } |
| 360 for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) { |
| 361 const size_t backward = cur_ix - prev_ix; |
| 362 const size_t prev_ix_masked = prev_ix & ring_buffer_mask; |
| 363 if (backward == 0 || backward > max_backward || depth_remaining == 0) { |
| 364 if (should_reroot_tree) { |
| 365 self->forest_[node_left] = self->invalid_pos_; |
| 366 self->forest_[node_right] = self->invalid_pos_; |
| 367 } |
| 368 break; |
| 369 } |
| 370 { |
| 371 const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right); |
| 372 size_t len; |
| 373 assert(cur_len <= MAX_TREE_COMP_LENGTH); |
| 374 len = cur_len + |
| 375 FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len], |
| 376 &data[prev_ix_masked + cur_len], |
| 377 max_length - cur_len); |
| 378 assert(0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len)); |
| 379 if (matches && len > *best_len) { |
| 380 *best_len = len; |
| 381 InitBackwardMatch(matches++, backward, len); |
| 382 } |
| 383 if (len >= max_comp_len) { |
| 384 if (should_reroot_tree) { |
| 385 self->forest_[node_left] = |
| 386 self->forest_[FN(LeftChildIndex)(self, prev_ix)]; |
| 387 self->forest_[node_right] = |
| 388 self->forest_[FN(RightChildIndex)(self, prev_ix)]; |
208 } | 389 } |
209 } | |
210 } | |
211 if (kBucketSweep == 1) { | |
212 // Only one to look for, don't bother to prepare for a loop. | |
213 prev_ix = buckets_[key]; | |
214 buckets_[key] = static_cast<uint32_t>(cur_ix); | |
215 size_t backward = cur_ix - prev_ix; | |
216 prev_ix &= static_cast<uint32_t>(ring_buffer_mask); | |
217 if (compare_char != ring_buffer[prev_ix + best_len_in]) { | |
218 return false; | |
219 } | |
220 if (PREDICT_FALSE(backward == 0 || backward > max_backward)) { | |
221 return false; | |
222 } | |
223 const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix], | |
224 &ring_buffer[cur_ix_masked], | |
225 max_length); | |
226 if (len >= 4) { | |
227 *best_len_out = len; | |
228 *best_len_code_out = len; | |
229 *best_distance_out = backward; | |
230 *best_score_out = BackwardReferenceScore(len, backward); | |
231 return true; | |
232 } | |
233 } else { | |
234 uint32_t *bucket = buckets_ + key; | |
235 prev_ix = *bucket++; | |
236 for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) { | |
237 const size_t backward = cur_ix - prev_ix; | |
238 prev_ix &= static_cast<uint32_t>(ring_buffer_mask); | |
239 if (compare_char != ring_buffer[prev_ix + best_len]) { | |
240 continue; | |
241 } | |
242 if (PREDICT_FALSE(backward == 0 || backward > max_backward)) { | |
243 continue; | |
244 } | |
245 const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix], | |
246 &ring_buffer[cur_ix_masked], | |
247 max_length); | |
248 if (len >= 4) { | |
249 const double score = BackwardReferenceScore(len, backward); | |
250 if (best_score < score) { | |
251 best_score = score; | |
252 best_len = len; | |
253 *best_len_out = best_len; | |
254 *best_len_code_out = best_len; | |
255 *best_distance_out = backward; | |
256 *best_score_out = score; | |
257 compare_char = ring_buffer[cur_ix_masked + best_len]; | |
258 match_found = true; | |
259 } | |
260 } | |
261 } | |
262 } | |
263 if (kUseDictionary && !match_found && | |
264 num_dict_matches_ >= (num_dict_lookups_ >> 7)) { | |
265 ++num_dict_lookups_; | |
266 const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1; | |
267 const uint16_t v = kStaticDictionaryHash[dict_key]; | |
268 if (v > 0) { | |
269 const uint32_t len = v & 31; | |
270 const uint32_t dist = v >> 5; | |
271 const size_t offset = | |
272 kBrotliDictionaryOffsetsByLength[len] + len * dist; | |
273 if (len <= max_length) { | |
274 const size_t matchlen = | |
275 FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked], | |
276 &kBrotliDictionary[offset], len); | |
277 if (matchlen + kCutoffTransformsCount > len && matchlen > 0) { | |
278 const size_t transform_id = kCutoffTransforms[len - matchlen]; | |
279 const size_t word_id = | |
280 transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) + | |
281 dist; | |
282 const size_t backward = max_backward + word_id + 1; | |
283 const double score = BackwardReferenceScore(matchlen, backward); | |
284 if (best_score < score) { | |
285 ++num_dict_matches_; | |
286 best_score = score; | |
287 best_len = matchlen; | |
288 *best_len_out = best_len; | |
289 *best_len_code_out = len; | |
290 *best_distance_out = backward; | |
291 *best_score_out = best_score; | |
292 match_found = true; | |
293 } | |
294 } | |
295 } | |
296 } | |
297 } | |
298 const uint32_t off = (cur_ix >> 3) % kBucketSweep; | |
299 buckets_[key + off] = static_cast<uint32_t>(cur_ix); | |
300 return match_found; | |
301 } | |
302 | |
303 enum { kHashLength = 5 }; | |
304 enum { kHashTypeLength = 8 }; | |
305 // HashBytes is the function that chooses the bucket to place | |
306 // the address in. The HashLongestMatch and HashLongestMatchQuickly | |
307 // classes have separate, different implementations of hashing. | |
308 static uint32_t HashBytes(const uint8_t *data) { | |
309 // Computing a hash based on 5 bytes works much better for | |
310 // qualities 1 and 3, where the next hash value is likely to replace | |
311 uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32; | |
312 // The higher bits contain more mixture from the multiplication, | |
313 // so we take our results from there. | |
314 return static_cast<uint32_t>(h >> (64 - kBucketBits)); | |
315 } | |
316 | |
317 enum { kHashMapSize = 4 << kBucketBits }; | |
318 | |
319 private: | |
320 static const uint32_t kBucketSize = 1 << kBucketBits; | |
321 uint32_t buckets_[kBucketSize + kBucketSweep]; | |
322 // True if buckets_ array needs to be initialized. | |
323 bool need_init_; | |
324 size_t num_dict_lookups_; | |
325 size_t num_dict_matches_; | |
326 }; | |
327 | |
328 // A (forgetful) hash table to the data seen by the compressor, to | |
329 // help create backward references to previous data. | |
330 // | |
331 // This is a hash map of fixed size (kBucketSize) to a ring buffer of | |
332 // fixed size (kBlockSize). The ring buffer contains the last kBlockSize | |
333 // index positions of the given hash key in the compressed data. | |
334 template <int kBucketBits, | |
335 int kBlockBits, | |
336 int kNumLastDistancesToCheck> | |
337 class HashLongestMatch { | |
338 public: | |
339 HashLongestMatch(void) { | |
340 Reset(); | |
341 } | |
342 | |
343 void Reset(void) { | |
344 need_init_ = true; | |
345 num_dict_lookups_ = 0; | |
346 num_dict_matches_ = 0; | |
347 } | |
348 | |
349 void Init(void) { | |
350 if (need_init_) { | |
351 memset(&num_[0], 0, sizeof(num_)); | |
352 need_init_ = false; | |
353 } | |
354 } | |
355 | |
356 void InitForData(const uint8_t* data, size_t num) { | |
357 for (size_t i = 0; i < num; ++i) { | |
358 const uint32_t key = HashBytes(&data[i]); | |
359 num_[key] = 0; | |
360 need_init_ = false; | |
361 } | |
362 } | |
363 | |
364 // Look at 3 bytes at data. | |
365 // Compute a hash from these, and store the value of ix at that position. | |
366 inline void Store(const uint8_t *data, const uint32_t ix) { | |
367 const uint32_t key = HashBytes(data); | |
368 const int minor_ix = num_[key] & kBlockMask; | |
369 buckets_[key][minor_ix] = ix; | |
370 ++num_[key]; | |
371 } | |
372 | |
373 // Find a longest backward match of &data[cur_ix] up to the length of | |
374 // max_length and stores the position cur_ix in the hash table. | |
375 // | |
376 // Does not look for matches longer than max_length. | |
377 // Does not look for matches further away than max_backward. | |
378 // Writes the best found match length into best_len_out. | |
379 // Writes the index (&data[index]) offset from the start of the best match | |
380 // into best_distance_out. | |
381 // Write the score of the best match into best_score_out. | |
382 bool FindLongestMatch(const uint8_t * __restrict data, | |
383 const size_t ring_buffer_mask, | |
384 const int* __restrict distance_cache, | |
385 const size_t cur_ix, | |
386 const size_t max_length, | |
387 const size_t max_backward, | |
388 size_t * __restrict best_len_out, | |
389 size_t * __restrict best_len_code_out, | |
390 size_t * __restrict best_distance_out, | |
391 double * __restrict best_score_out) { | |
392 *best_len_code_out = 0; | |
393 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; | |
394 bool match_found = false; | |
395 // Don't accept a short copy from far away. | |
396 double best_score = *best_score_out; | |
397 size_t best_len = *best_len_out; | |
398 *best_len_out = 0; | |
399 // Try last distance first. | |
400 for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) { | |
401 const size_t idx = kDistanceCacheIndex[i]; | |
402 const size_t backward = | |
403 static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]); | |
404 size_t prev_ix = static_cast<size_t>(cur_ix - backward); | |
405 if (prev_ix >= cur_ix) { | |
406 continue; | |
407 } | |
408 if (PREDICT_FALSE(backward > max_backward)) { | |
409 continue; | |
410 } | |
411 prev_ix &= ring_buffer_mask; | |
412 | |
413 if (cur_ix_masked + best_len > ring_buffer_mask || | |
414 prev_ix + best_len > ring_buffer_mask || | |
415 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) { | |
416 continue; | |
417 } | |
418 const size_t len = FindMatchLengthWithLimit(&data[prev_ix], | |
419 &data[cur_ix_masked], | |
420 max_length); | |
421 if (len >= 3 || (len == 2 && i < 2)) { | |
422 // Comparing for >= 2 does not change the semantics, but just saves for | |
423 // a few unnecessary binary logarithms in backward reference score, | |
424 // since we are not interested in such short matches. | |
425 double score = BackwardReferenceScoreUsingLastDistance(len, i); | |
426 if (best_score < score) { | |
427 best_score = score; | |
428 best_len = len; | |
429 *best_len_out = best_len; | |
430 *best_len_code_out = best_len; | |
431 *best_distance_out = backward; | |
432 *best_score_out = best_score; | |
433 match_found = true; | |
434 } | |
435 } | |
436 } | |
437 const uint32_t key = HashBytes(&data[cur_ix_masked]); | |
438 const uint32_t * __restrict const bucket = &buckets_[key][0]; | |
439 const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0; | |
440 for (size_t i = num_[key]; i > down;) { | |
441 --i; | |
442 size_t prev_ix = bucket[i & kBlockMask]; | |
443 const size_t backward = cur_ix - prev_ix; | |
444 if (PREDICT_FALSE(backward == 0 || backward > max_backward)) { | |
445 break; | 390 break; |
446 } | 391 } |
447 prev_ix &= ring_buffer_mask; | 392 if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) { |
448 if (cur_ix_masked + best_len > ring_buffer_mask || | 393 best_len_left = len; |
449 prev_ix + best_len > ring_buffer_mask || | 394 if (should_reroot_tree) { |
450 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) { | 395 self->forest_[node_left] = (uint32_t)prev_ix; |
451 continue; | 396 } |
| 397 node_left = FN(RightChildIndex)(self, prev_ix); |
| 398 prev_ix = self->forest_[node_left]; |
| 399 } else { |
| 400 best_len_right = len; |
| 401 if (should_reroot_tree) { |
| 402 self->forest_[node_right] = (uint32_t)prev_ix; |
| 403 } |
| 404 node_right = FN(LeftChildIndex)(self, prev_ix); |
| 405 prev_ix = self->forest_[node_right]; |
452 } | 406 } |
453 const size_t len = FindMatchLengthWithLimit(&data[prev_ix], | 407 } |
454 &data[cur_ix_masked], | 408 } |
455 max_length); | 409 return matches; |
456 if (len >= 4) { | 410 } |
457 // Comparing for >= 3 does not change the semantics, but just saves | 411 |
458 // for a few unnecessary binary logarithms in backward reference | 412 /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the |
459 // score, since we are not interested in such short matches. | 413 length of max_length and stores the position cur_ix in the hash table. |
460 double score = BackwardReferenceScore(len, backward); | 414 |
461 if (best_score < score) { | 415 Sets *num_matches to the number of matches found, and stores the found |
462 best_score = score; | 416 matches in matches[0] to matches[*num_matches - 1]. The matches will be |
463 best_len = len; | 417 sorted by strictly increasing length and (non-strictly) increasing |
464 *best_len_out = best_len; | 418 distance. */ |
465 *best_len_code_out = best_len; | 419 static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self, |
466 *best_distance_out = backward; | 420 const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix, |
467 *best_score_out = best_score; | 421 const size_t max_length, const size_t max_backward, |
468 match_found = true; | 422 const BrotliEncoderParams* params, BackwardMatch* matches) { |
469 } | 423 BackwardMatch* const orig_matches = matches; |
470 } | 424 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; |
471 } | 425 size_t best_len = 1; |
472 buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix); | 426 const size_t short_match_max_backward = |
473 ++num_[key]; | 427 params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64; |
474 if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) { | 428 size_t stop = cur_ix - short_match_max_backward; |
475 size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1; | 429 uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1]; |
476 for (int k = 0; k < 2; ++k, ++dict_key) { | 430 size_t i; |
477 ++num_dict_lookups_; | 431 if (cur_ix < short_match_max_backward) { stop = 0; } |
478 const uint16_t v = kStaticDictionaryHash[dict_key]; | 432 for (i = cur_ix - 1; i > stop && best_len <= 2; --i) { |
479 if (v > 0) { | 433 size_t prev_ix = i; |
480 const size_t len = v & 31; | 434 const size_t backward = cur_ix - prev_ix; |
481 const size_t dist = v >> 5; | 435 if (BROTLI_PREDICT_FALSE(backward > max_backward)) { |
482 const size_t offset = | 436 break; |
483 kBrotliDictionaryOffsetsByLength[len] + len * dist; | 437 } |
484 if (len <= max_length) { | 438 prev_ix &= ring_buffer_mask; |
485 const size_t matchlen = | 439 if (data[cur_ix_masked] != data[prev_ix] || |
486 FindMatchLengthWithLimit(&data[cur_ix_masked], | 440 data[cur_ix_masked + 1] != data[prev_ix + 1]) { |
487 &kBrotliDictionary[offset], len); | 441 continue; |
488 if (matchlen + kCutoffTransformsCount > len && matchlen > 0) { | 442 } |
489 const size_t transform_id = kCutoffTransforms[len - matchlen]; | 443 { |
490 const size_t word_id = | |
491 transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) + | |
492 dist; | |
493 const size_t backward = max_backward + word_id + 1; | |
494 double score = BackwardReferenceScore(matchlen, backward); | |
495 if (best_score < score) { | |
496 ++num_dict_matches_; | |
497 best_score = score; | |
498 best_len = matchlen; | |
499 *best_len_out = best_len; | |
500 *best_len_code_out = len; | |
501 *best_distance_out = backward; | |
502 *best_score_out = best_score; | |
503 match_found = true; | |
504 } | |
505 } | |
506 } | |
507 } | |
508 } | |
509 } | |
510 return match_found; | |
511 } | |
512 | |
513 // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the | |
514 // length of max_length and stores the position cur_ix in the hash table. | |
515 // | |
516 // Sets *num_matches to the number of matches found, and stores the found | |
517 // matches in matches[0] to matches[*num_matches - 1]. The matches will be | |
518 // sorted by strictly increasing length and (non-strictly) increasing | |
519 // distance. | |
520 size_t FindAllMatches(const uint8_t* data, | |
521 const size_t ring_buffer_mask, | |
522 const size_t cur_ix, | |
523 const size_t max_length, | |
524 const size_t max_backward, | |
525 BackwardMatch* matches) { | |
526 BackwardMatch* const orig_matches = matches; | |
527 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; | |
528 size_t best_len = 1; | |
529 size_t stop = cur_ix - 64; | |
530 if (cur_ix < 64) { stop = 0; } | |
531 for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) { | |
532 size_t prev_ix = i; | |
533 const size_t backward = cur_ix - prev_ix; | |
534 if (PREDICT_FALSE(backward > max_backward)) { | |
535 break; | |
536 } | |
537 prev_ix &= ring_buffer_mask; | |
538 if (data[cur_ix_masked] != data[prev_ix] || | |
539 data[cur_ix_masked + 1] != data[prev_ix + 1]) { | |
540 continue; | |
541 } | |
542 const size_t len = | 444 const size_t len = |
543 FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked], | 445 FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked], |
544 max_length); | 446 max_length); |
545 if (len > best_len) { | 447 if (len > best_len) { |
546 best_len = len; | 448 best_len = len; |
547 *matches++ = BackwardMatch(backward, len); | 449 InitBackwardMatch(matches++, backward, len); |
548 } | 450 } |
549 } | 451 } |
550 const uint32_t key = HashBytes(&data[cur_ix_masked]); | 452 } |
551 const uint32_t * __restrict const bucket = &buckets_[key][0]; | 453 if (best_len < max_length) { |
552 const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0; | 454 matches = FN(StoreAndFindMatches)(self, data, cur_ix, ring_buffer_mask, |
553 for (size_t i = num_[key]; i > down;) { | 455 max_length, max_backward, &best_len, matches); |
554 --i; | 456 } |
555 size_t prev_ix = bucket[i & kBlockMask]; | 457 for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) { |
556 const size_t backward = cur_ix - prev_ix; | 458 dict_matches[i] = kInvalidMatch; |
557 if (PREDICT_FALSE(backward == 0 || backward > max_backward)) { | 459 } |
558 break; | 460 { |
559 } | 461 size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1); |
560 prev_ix &= ring_buffer_mask; | 462 if (BrotliFindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, |
561 if (cur_ix_masked + best_len > ring_buffer_mask || | 463 max_length, &dict_matches[0])) { |
562 prev_ix + best_len > ring_buffer_mask || | 464 size_t maxlen = BROTLI_MIN( |
563 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) { | 465 size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length); |
564 continue; | 466 size_t l; |
565 } | 467 for (l = minlen; l <= maxlen; ++l) { |
566 const size_t len = | |
567 FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked], | |
568 max_length); | |
569 if (len > best_len) { | |
570 best_len = len; | |
571 *matches++ = BackwardMatch(backward, len); | |
572 } | |
573 } | |
574 buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix); | |
575 ++num_[key]; | |
576 uint32_t dict_matches[kMaxDictionaryMatchLen + 1]; | |
577 for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) { | |
578 dict_matches[i] = kInvalidMatch; | |
579 } | |
580 size_t minlen = std::max<size_t>(4, best_len + 1); | |
581 if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length, | |
582 &dict_matches[0])) { | |
583 size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length); | |
584 for (size_t l = minlen; l <= maxlen; ++l) { | |
585 uint32_t dict_id = dict_matches[l]; | 468 uint32_t dict_id = dict_matches[l]; |
586 if (dict_id < kInvalidMatch) { | 469 if (dict_id < kInvalidMatch) { |
587 *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l, | 470 InitDictionaryBackwardMatch(matches++, |
588 dict_id & 31); | 471 max_backward + (dict_id >> 5) + 1, l, dict_id & 31); |
589 } | 472 } |
590 } | 473 } |
591 } | 474 } |
592 return static_cast<size_t>(matches - orig_matches); | 475 } |
593 } | 476 return (size_t)(matches - orig_matches); |
594 | 477 } |
595 enum { kHashLength = 4 }; | 478 |
596 enum { kHashTypeLength = 4 }; | 479 /* Stores the hash of the next 4 bytes and re-roots the binary tree at the |
597 | 480 current sequence, without returning any matches. |
598 // HashBytes is the function that chooses the bucket to place | 481 REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */ |
599 // the address in. The HashLongestMatch and HashLongestMatchQuickly | 482 static BROTLI_INLINE void FN(Store)(HashToBinaryTree* self, const uint8_t *data, |
600 // classes have separate, different implementations of hashing. | 483 const size_t mask, const size_t ix) { |
601 static uint32_t HashBytes(const uint8_t *data) { | 484 /* Maximum distance is window size - 16, see section 9.1. of the spec. */ |
602 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32; | 485 const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1; |
603 // The higher bits contain more mixture from the multiplication, | 486 FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH, |
604 // so we take our results from there. | 487 max_backward, NULL, NULL); |
605 return h >> (32 - kBucketBits); | 488 } |
606 } | 489 |
607 | 490 static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self, |
608 enum { kHashMapSize = 2 << kBucketBits }; | 491 const uint8_t *data, const size_t mask, const size_t ix_start, |
609 | 492 const size_t ix_end) { |
610 static const size_t kMaxNumMatches = 64 + (1 << kBlockBits); | 493 size_t i = ix_start; |
611 | 494 size_t j = ix_start; |
612 private: | 495 if (ix_start + 63 <= ix_end) { |
613 // Number of hash buckets. | 496 i = ix_end - 63; |
614 static const uint32_t kBucketSize = 1 << kBucketBits; | 497 } |
615 | 498 if (ix_start + 512 <= i) { |
616 // Only kBlockSize newest backward references are kept, | 499 for (; j < i; j += 8) { |
617 // and the older are forgotten. | 500 FN(Store)(self, data, mask, j); |
618 static const uint32_t kBlockSize = 1 << kBlockBits; | 501 } |
619 | 502 } |
620 // Mask for accessing entries in a block (in a ringbuffer manner). | 503 for (; i < ix_end; ++i) { |
621 static const uint32_t kBlockMask = (1 << kBlockBits) - 1; | 504 FN(Store)(self, data, mask, i); |
622 | 505 } |
623 // Number of entries in a particular bucket. | 506 } |
624 uint16_t num_[kBucketSize]; | 507 |
625 | 508 static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashToBinaryTree* self, |
626 // Buckets containing kBlockSize of backward references. | 509 size_t num_bytes, size_t position, const uint8_t* ringbuffer, |
627 uint32_t buckets_[kBucketSize][kBlockSize]; | 510 size_t ringbuffer_mask) { |
628 | 511 if (num_bytes >= FN(HashTypeLength)() - 1 && |
629 // True if num_ array needs to be initialized. | 512 position >= MAX_TREE_COMP_LENGTH) { |
630 bool need_init_; | 513 /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher. |
631 | 514 These could not be calculated before, since they require knowledge |
632 size_t num_dict_lookups_; | 515 of both the previous and the current block. */ |
633 size_t num_dict_matches_; | 516 const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1; |
634 }; | 517 const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes); |
635 | 518 size_t i; |
636 // A (forgetful) hash table where each hash bucket contains a binary tree of | 519 for (i = i_start; i < i_end; ++i) { |
637 // sequences whose first 4 bytes share the same hash code. | 520 /* Maximum distance is window size - 16, see section 9.1. of the spec. |
638 // Each sequence is kMaxTreeCompLength long and is identified by its starting | 521 Furthermore, we have to make sure that we don't look further back |
639 // position in the input data. The binary tree is sorted by the lexicographic | 522 from the start of the next block than the window size, otherwise we |
640 // order of the sequences, and it is also a max-heap with respect to the | 523 could access already overwritten areas of the ring-buffer. */ |
641 // starting positions. | 524 const size_t max_backward = |
642 class HashToBinaryTree { | 525 self->window_mask_ - BROTLI_MAX(size_t, |
643 public: | 526 BROTLI_WINDOW_GAP - 1, |
644 HashToBinaryTree() : forest_(NULL) { | 527 position - i); |
645 Reset(); | 528 /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the |
646 } | 529 end of the current block and that we have at least |
647 | 530 MAX_TREE_COMP_LENGTH tail in the ring-buffer. */ |
648 ~HashToBinaryTree() { | 531 FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask, |
649 delete[] forest_; | 532 MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL); |
650 } | 533 } |
651 | 534 } |
652 void Reset() { | 535 } |
653 need_init_ = true; | 536 |
654 } | 537 #undef BUCKET_SIZE |
655 | 538 #undef BUCKET_BITS |
656 void Init(int lgwin, size_t position, size_t bytes, bool is_last) { | 539 |
657 if (need_init_) { | 540 #undef HASHER |
658 window_mask_ = (1u << lgwin) - 1u; | 541 |
659 invalid_pos_ = static_cast<uint32_t>(0 - window_mask_); | 542 /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression |
660 for (uint32_t i = 0; i < kBucketSize; i++) { | 543 a little faster (0.5% - 1%) and it compresses 0.15% better on small text |
661 buckets_[i] = invalid_pos_; | 544 and HTML inputs. */ |
662 } | 545 |
663 size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1; | 546 #define HASHER() H2 |
664 forest_ = new uint32_t[2 * num_nodes]; | 547 #define BUCKET_BITS 16 |
665 need_init_ = false; | 548 #define BUCKET_SWEEP 1 |
666 } | 549 #define USE_DICTIONARY 1 |
667 } | 550 #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ |
668 | 551 #undef BUCKET_SWEEP |
669 // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the | 552 #undef USE_DICTIONARY |
670 // length of max_length and stores the position cur_ix in the hash table. | 553 #undef HASHER |
671 // | 554 |
672 // Sets *num_matches to the number of matches found, and stores the found | 555 #define HASHER() H3 |
673 // matches in matches[0] to matches[*num_matches - 1]. The matches will be | 556 #define BUCKET_SWEEP 2 |
674 // sorted by strictly increasing length and (non-strictly) increasing | 557 #define USE_DICTIONARY 0 |
675 // distance. | 558 #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ |
676 size_t FindAllMatches(const uint8_t* data, | 559 #undef USE_DICTIONARY |
677 const size_t ring_buffer_mask, | 560 #undef BUCKET_SWEEP |
678 const size_t cur_ix, | 561 #undef BUCKET_BITS |
679 const size_t max_length, | 562 #undef HASHER |
680 const size_t max_backward, | 563 |
681 BackwardMatch* matches) { | 564 #define HASHER() H4 |
682 BackwardMatch* const orig_matches = matches; | 565 #define BUCKET_BITS 17 |
683 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; | 566 #define BUCKET_SWEEP 4 |
684 size_t best_len = 1; | 567 #define USE_DICTIONARY 1 |
685 size_t stop = cur_ix - 64; | 568 #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ |
686 if (cur_ix < 64) { stop = 0; } | 569 #undef USE_DICTIONARY |
687 for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) { | 570 #undef BUCKET_SWEEP |
688 size_t prev_ix = i; | 571 #undef BUCKET_BITS |
689 const size_t backward = cur_ix - prev_ix; | 572 #undef HASHER |
690 if (PREDICT_FALSE(backward > max_backward)) { | 573 |
691 break; | 574 #define HASHER() H5 |
692 } | 575 #define BUCKET_BITS 14 |
693 prev_ix &= ring_buffer_mask; | 576 #define BLOCK_BITS 4 |
694 if (data[cur_ix_masked] != data[prev_ix] || | 577 #define NUM_LAST_DISTANCES_TO_CHECK 4 |
695 data[cur_ix_masked + 1] != data[prev_ix + 1]) { | 578 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */ |
696 continue; | 579 #undef BLOCK_BITS |
697 } | 580 #undef HASHER |
698 const size_t len = | 581 |
699 FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked], | 582 #define HASHER() H6 |
700 max_length); | 583 #define BLOCK_BITS 5 |
701 if (len > best_len) { | 584 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */ |
702 best_len = len; | 585 #undef NUM_LAST_DISTANCES_TO_CHECK |
703 *matches++ = BackwardMatch(backward, len); | 586 #undef BLOCK_BITS |
704 } | 587 #undef BUCKET_BITS |
705 } | 588 #undef HASHER |
706 if (best_len < max_length) { | 589 |
707 matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask, | 590 #define HASHER() H7 |
708 max_length, &best_len, matches); | 591 #define BUCKET_BITS 15 |
709 } | 592 #define BLOCK_BITS 6 |
710 uint32_t dict_matches[kMaxDictionaryMatchLen + 1]; | 593 #define NUM_LAST_DISTANCES_TO_CHECK 10 |
711 for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) { | 594 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */ |
712 dict_matches[i] = kInvalidMatch; | 595 #undef BLOCK_BITS |
713 } | 596 #undef HASHER |
714 size_t minlen = std::max<size_t>(4, best_len + 1); | 597 |
715 if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length, | 598 #define HASHER() H8 |
716 &dict_matches[0])) { | 599 #define BLOCK_BITS 7 |
717 size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length); | 600 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */ |
718 for (size_t l = minlen; l <= maxlen; ++l) { | 601 #undef NUM_LAST_DISTANCES_TO_CHECK |
719 uint32_t dict_id = dict_matches[l]; | 602 #undef BLOCK_BITS |
720 if (dict_id < kInvalidMatch) { | 603 #undef HASHER |
721 *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l, | 604 |
722 dict_id & 31); | 605 #define HASHER() H9 |
723 } | 606 #define BLOCK_BITS 8 |
724 } | 607 #define NUM_LAST_DISTANCES_TO_CHECK 16 |
725 } | 608 #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */ |
726 return static_cast<size_t>(matches - orig_matches); | 609 #undef NUM_LAST_DISTANCES_TO_CHECK |
727 } | 610 #undef BLOCK_BITS |
728 | 611 #undef BUCKET_BITS |
729 // Stores the hash of the next 4 bytes and re-roots the binary tree at the | 612 #undef HASHER |
730 // current sequence, without returning any matches. | 613 |
731 // REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block | 614 #define BUCKET_BITS 15 |
732 void Store(const uint8_t* data, | 615 |
733 const size_t ring_buffer_mask, | 616 #define NUM_LAST_DISTANCES_TO_CHECK 4 |
734 const size_t cur_ix) { | 617 #define NUM_BANKS 1 |
735 size_t best_len = 0; | 618 #define BANK_BITS 16 |
736 StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength, | 619 #define HASHER() H40 |
737 &best_len, NULL); | 620 #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ |
738 } | 621 #undef HASHER |
739 | 622 #undef NUM_LAST_DISTANCES_TO_CHECK |
740 void StitchToPreviousBlock(size_t num_bytes, | 623 |
741 size_t position, | 624 #define NUM_LAST_DISTANCES_TO_CHECK 10 |
742 const uint8_t* ringbuffer, | 625 #define HASHER() H41 |
743 size_t ringbuffer_mask) { | 626 #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ |
744 if (num_bytes >= 3 && position >= kMaxTreeCompLength) { | 627 #undef HASHER |
745 // Store the last `kMaxTreeCompLength - 1` positions in the hasher. | 628 #undef NUM_LAST_DISTANCES_TO_CHECK |
746 // These could not be calculated before, since they require knowledge | 629 #undef NUM_BANKS |
747 // of both the previous and the current block. | 630 #undef BANK_BITS |
748 const size_t i_start = position - kMaxTreeCompLength + 1; | 631 |
749 const size_t i_end = std::min(position, i_start + num_bytes); | 632 #define NUM_LAST_DISTANCES_TO_CHECK 16 |
750 for (size_t i = i_start; i < i_end; ++i) { | 633 #define NUM_BANKS 512 |
751 // We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the | 634 #define BANK_BITS 9 |
752 // end of the current block and that we have at least | 635 #define HASHER() H42 |
753 // kMaxTreeCompLength tail in the ringbuffer. | 636 #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ |
754 Store(ringbuffer, ringbuffer_mask, i); | 637 #undef HASHER |
755 } | 638 #undef NUM_LAST_DISTANCES_TO_CHECK |
756 } | 639 #undef NUM_BANKS |
757 } | 640 #undef BANK_BITS |
758 | 641 |
759 static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth; | 642 #undef BUCKET_BITS |
760 | 643 |
761 private: | 644 #undef FN |
762 // Stores the hash of the next 4 bytes and in a single tree-traversal, the | 645 #undef CAT |
763 // hash bucket's binary tree is searched for matches and is re-rooted at the | 646 #undef EXPAND_CAT |
764 // current position. | 647 |
765 // | 648 #define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(7) H(8) H(9) \ |
766 // If less than kMaxTreeCompLength data is available, the hash bucket of the | 649 H(40) H(41) H(42) |
767 // current position is searched for matches, but the state of the hash table | 650 #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10) |
768 // is not changed, since we can not know the final sorting order of the | 651 |
769 // current (incomplete) sequence. | 652 typedef struct Hashers { |
770 // | 653 #define MEMBER_(N) H ## N* h ## N; |
771 // This function must be called with increasing cur_ix positions. | 654 FOR_ALL_HASHERS(MEMBER_) |
772 BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data, | 655 #undef MEMBER_ |
773 const size_t cur_ix, | 656 } Hashers; |
774 const size_t ring_buffer_mask, | 657 |
775 const size_t max_length, | 658 static BROTLI_INLINE void InitHashers(Hashers* self) { |
776 size_t* const __restrict best_len, | 659 #define INIT_(N) self->h ## N = 0; |
777 BackwardMatch* __restrict matches) { | 660 FOR_ALL_HASHERS(INIT_) |
778 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; | 661 #undef INIT_ |
779 const size_t max_backward = window_mask_ - 15; | 662 } |
780 const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength); | 663 |
781 const bool reroot_tree = max_length >= kMaxTreeCompLength; | 664 static BROTLI_INLINE void DestroyHashers(MemoryManager* m, Hashers* self) { |
782 const uint32_t key = HashBytes(&data[cur_ix_masked]); | 665 if (self->h10) CleanupH10(m, self->h10); |
783 size_t prev_ix = buckets_[key]; | 666 #define CLEANUP_(N) BROTLI_FREE(m, self->h ## N) |
784 // The forest index of the rightmost node of the left subtree of the new | 667 FOR_ALL_HASHERS(CLEANUP_) |
785 // root, updated as we traverse and reroot the tree of the hash bucket. | 668 #undef CLEANUP_ |
786 size_t node_left = LeftChildIndex(cur_ix); | 669 } |
787 // The forest index of the leftmost node of the right subtree of the new | 670 |
788 // root, updated as we traverse and reroot the tree of the hash bucket. | 671 static BROTLI_INLINE void HashersReset(Hashers* self, int type) { |
789 size_t node_right = RightChildIndex(cur_ix); | 672 switch (type) { |
790 // The match length of the rightmost node of the left subtree of the new | 673 #define RESET_(N) case N: ResetH ## N(self->h ## N); break; |
791 // root, updated as we traverse and reroot the tree of the hash bucket. | 674 FOR_ALL_HASHERS(RESET_) |
792 size_t best_len_left = 0; | 675 #undef RESET_ |
793 // The match length of the leftmost node of the right subtree of the new | 676 default: break; |
794 // root, updated as we traverse and reroot the tree of the hash bucket. | 677 } |
795 size_t best_len_right = 0; | 678 } |
796 if (reroot_tree) { | 679 |
797 buckets_[key] = static_cast<uint32_t>(cur_ix); | 680 static BROTLI_INLINE void HashersSetup( |
798 } | 681 MemoryManager* m, Hashers* self, int type) { |
799 for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) { | 682 switch (type) { |
800 const size_t backward = cur_ix - prev_ix; | 683 #define SETUP_(N) case N: self->h ## N = BROTLI_ALLOC(m, H ## N, 1); break; |
801 const size_t prev_ix_masked = prev_ix & ring_buffer_mask; | 684 FOR_ALL_HASHERS(SETUP_) |
802 if (backward == 0 || backward > max_backward || depth_remaining == 0) { | 685 #undef SETUP_ |
803 if (reroot_tree) { | 686 default: break; |
804 forest_[node_left] = invalid_pos_; | 687 } |
805 forest_[node_right] = invalid_pos_; | 688 if (BROTLI_IS_OOM(m)) return; |
806 } | 689 if (type == 10) InitializeH10(self->h10); |
807 break; | 690 HashersReset(self, type); |
808 } | 691 } |
809 const size_t cur_len = std::min(best_len_left, best_len_right); | 692 |
810 const size_t len = cur_len + | 693 #define WARMUP_HASH_(N) \ |
811 FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len], | 694 static BROTLI_INLINE void WarmupHashH ## N(MemoryManager* m, \ |
812 &data[prev_ix_masked + cur_len], | 695 const BrotliEncoderParams* params, const size_t size, const uint8_t* dict, \ |
813 max_length - cur_len); | 696 H ## N* hasher) { \ |
814 if (len > *best_len) { | 697 size_t overlap = (StoreLookaheadH ## N()) - 1; \ |
815 *best_len = len; | 698 size_t i; \ |
816 if (matches) { | 699 InitH ## N(m, hasher, dict, params, 0, size, BROTLI_FALSE); \ |
817 *matches++ = BackwardMatch(backward, len); | 700 if (BROTLI_IS_OOM(m)) return; \ |
818 } | 701 for (i = 0; i + overlap < size; i++) { \ |
819 if (len >= max_comp_len) { | 702 StoreH ## N(hasher, dict, ~(size_t)0, i); \ |
820 if (reroot_tree) { | 703 } \ |
821 forest_[node_left] = forest_[LeftChildIndex(prev_ix)]; | 704 } |
822 forest_[node_right] = forest_[RightChildIndex(prev_ix)]; | 705 FOR_ALL_HASHERS(WARMUP_HASH_) |
823 } | 706 #undef WARMUP_HASH_ |
824 break; | 707 |
825 } | 708 /* Custom LZ77 window. */ |
826 } | 709 static BROTLI_INLINE void HashersPrependCustomDictionary( |
827 if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) { | 710 MemoryManager* m, Hashers* self, const BrotliEncoderParams* params, |
828 best_len_left = len; | 711 const size_t size, const uint8_t* dict) { |
829 if (reroot_tree) { | 712 int hasher_type = ChooseHasher(params); |
830 forest_[node_left] = static_cast<uint32_t>(prev_ix); | 713 switch (hasher_type) { |
831 } | 714 #define PREPEND_(N) \ |
832 node_left = RightChildIndex(prev_ix); | 715 case N: WarmupHashH ## N(m, params, size, dict, self->h ## N); break; |
833 prev_ix = forest_[node_left]; | 716 FOR_ALL_HASHERS(PREPEND_) |
834 } else { | 717 #undef PREPEND_ |
835 best_len_right = len; | 718 default: break; |
836 if (reroot_tree) { | 719 } |
837 forest_[node_right] = static_cast<uint32_t>(prev_ix); | 720 if (BROTLI_IS_OOM(m)) return; |
838 } | 721 } |
839 node_right = LeftChildIndex(prev_ix); | 722 |
840 prev_ix = forest_[node_right]; | 723 |
841 } | 724 #if defined(__cplusplus) || defined(c_plusplus) |
842 } | 725 } /* extern "C" */ |
843 return matches; | 726 #endif |
844 } | 727 |
845 | 728 #endif /* BROTLI_ENC_HASH_H_ */ |
846 inline size_t LeftChildIndex(const size_t pos) { | |
847 return 2 * (pos & window_mask_); | |
848 } | |
849 | |
850 inline size_t RightChildIndex(const size_t pos) { | |
851 return 2 * (pos & window_mask_) + 1; | |
852 } | |
853 | |
854 static uint32_t HashBytes(const uint8_t *data) { | |
855 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32; | |
856 // The higher bits contain more mixture from the multiplication, | |
857 // so we take our results from there. | |
858 return h >> (32 - kBucketBits); | |
859 } | |
860 | |
861 static const int kBucketBits = 17; | |
862 static const size_t kBucketSize = 1 << kBucketBits; | |
863 | |
864 // The window size minus 1 | |
865 size_t window_mask_; | |
866 | |
867 // Hash table that maps the 4-byte hashes of the sequence to the last | |
868 // position where this hash was found, which is the root of the binary | |
869 // tree of sequences that share this hash bucket. | |
870 uint32_t buckets_[kBucketSize]; | |
871 | |
872 // The union of the binary trees of each hash bucket. The root of the tree | |
873 // corresponding to a hash is a sequence starting at buckets_[hash] and | |
874 // the left and right children of a sequence starting at pos are | |
875 // forest_[2 * pos] and forest_[2 * pos + 1]. | |
876 uint32_t* forest_; | |
877 | |
878 // A position used to mark a non-existent sequence, i.e. a tree is empty if | |
879 // its root is at invalid_pos_ and a node is a leaf if both its children | |
880 // are at invalid_pos_. | |
881 uint32_t invalid_pos_; | |
882 | |
883 bool need_init_; | |
884 }; | |
885 | |
886 struct Hashers { | |
887 // For kBucketSweep == 1, enabling the dictionary lookup makes compression | |
888 // a little faster (0.5% - 1%) and it compresses 0.15% better on small text | |
889 // and html inputs. | |
890 typedef HashLongestMatchQuickly<16, 1, true> H2; | |
891 typedef HashLongestMatchQuickly<16, 2, false> H3; | |
892 typedef HashLongestMatchQuickly<17, 4, true> H4; | |
893 typedef HashLongestMatch<14, 4, 4> H5; | |
894 typedef HashLongestMatch<14, 5, 4> H6; | |
895 typedef HashLongestMatch<15, 6, 10> H7; | |
896 typedef HashLongestMatch<15, 7, 10> H8; | |
897 typedef HashLongestMatch<15, 8, 16> H9; | |
898 typedef HashToBinaryTree H10; | |
899 | |
900 Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0), | |
901 hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {} | |
902 | |
903 ~Hashers(void) { | |
904 delete hash_h2; | |
905 delete hash_h3; | |
906 delete hash_h4; | |
907 delete hash_h5; | |
908 delete hash_h6; | |
909 delete hash_h7; | |
910 delete hash_h8; | |
911 delete hash_h9; | |
912 delete hash_h10; | |
913 } | |
914 | |
915 void Init(int type) { | |
916 switch (type) { | |
917 case 2: hash_h2 = new H2; break; | |
918 case 3: hash_h3 = new H3; break; | |
919 case 4: hash_h4 = new H4; break; | |
920 case 5: hash_h5 = new H5; break; | |
921 case 6: hash_h6 = new H6; break; | |
922 case 7: hash_h7 = new H7; break; | |
923 case 8: hash_h8 = new H8; break; | |
924 case 9: hash_h9 = new H9; break; | |
925 case 10: hash_h10 = new H10; break; | |
926 default: break; | |
927 } | |
928 } | |
929 | |
930 template<typename Hasher> | |
931 void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) { | |
932 hasher->Init(); | |
933 for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) { | |
934 hasher->Store(&dict[i], static_cast<uint32_t>(i)); | |
935 } | |
936 } | |
937 | |
938 // Custom LZ77 window. | |
939 void PrependCustomDictionary( | |
940 int type, int lgwin, const size_t size, const uint8_t* dict) { | |
941 switch (type) { | |
942 case 2: WarmupHash(size, dict, hash_h2); break; | |
943 case 3: WarmupHash(size, dict, hash_h3); break; | |
944 case 4: WarmupHash(size, dict, hash_h4); break; | |
945 case 5: WarmupHash(size, dict, hash_h5); break; | |
946 case 6: WarmupHash(size, dict, hash_h6); break; | |
947 case 7: WarmupHash(size, dict, hash_h7); break; | |
948 case 8: WarmupHash(size, dict, hash_h8); break; | |
949 case 9: WarmupHash(size, dict, hash_h9); break; | |
950 case 10: | |
951 hash_h10->Init(lgwin, 0, size, false); | |
952 for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) { | |
953 hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i); | |
954 } | |
955 break; | |
956 default: break; | |
957 } | |
958 } | |
959 | |
960 | |
961 H2* hash_h2; | |
962 H3* hash_h3; | |
963 H4* hash_h4; | |
964 H5* hash_h5; | |
965 H6* hash_h6; | |
966 H7* hash_h7; | |
967 H8* hash_h8; | |
968 H9* hash_h9; | |
969 H10* hash_h10; | |
970 }; | |
971 | |
972 } // namespace brotli | |
973 | |
974 #endif // BROTLI_ENC_HASH_H_ | |
OLD | NEW |