OLD | NEW |
(Empty) | |
| 1 /* NOLINT(build/header_guard) */ |
| 2 /* Copyright 2010 Google Inc. All Rights Reserved. |
| 3 |
| 4 Distributed under MIT license. |
| 5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| 6 */ |
| 7 |
| 8 /* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP, USE_DICTIONARY */ |
| 9 |
| 10 #define HashLongestMatchQuickly HASHER() |
| 11 |
| 12 #define BUCKET_SIZE (1 << BUCKET_BITS) |
| 13 |
| 14 #define HASH_MAP_SIZE (4 << BUCKET_BITS) |
| 15 |
| 16 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; } |
| 17 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; } |
| 18 |
| 19 /* HashBytes is the function that chooses the bucket to place |
| 20 the address in. The HashLongestMatch and HashLongestMatchQuickly |
| 21 classes have separate, different implementations of hashing. */ |
| 22 static uint32_t FN(HashBytes)(const uint8_t *data) { |
| 23 /* Computing a hash based on 5 bytes works much better for |
| 24 qualities 1 and 3, where the next hash value is likely to replace */ |
| 25 uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32; |
| 26 /* The higher bits contain more mixture from the multiplication, |
| 27 so we take our results from there. */ |
| 28 return (uint32_t)(h >> (64 - BUCKET_BITS)); |
| 29 } |
| 30 |
| 31 /* A (forgetful) hash table to the data seen by the compressor, to |
| 32 help create backward references to previous data. |
| 33 |
| 34 This is a hash map of fixed size (BUCKET_SIZE). Starting from the |
| 35 given index, BUCKET_SWEEP buckets are used to store values of a key. */ |
| 36 typedef struct HashLongestMatchQuickly { |
| 37 uint32_t buckets_[BUCKET_SIZE + BUCKET_SWEEP]; |
| 38 /* True if buckets_ array needs to be initialized. */ |
| 39 BROTLI_BOOL is_dirty_; |
| 40 DictionarySearchStatictics dict_search_stats_; |
| 41 } HashLongestMatchQuickly; |
| 42 |
| 43 static void FN(Reset)(HashLongestMatchQuickly* self) { |
| 44 self->is_dirty_ = BROTLI_TRUE; |
| 45 DictionarySearchStaticticsReset(&self->dict_search_stats_); |
| 46 } |
| 47 |
| 48 static void FN(InitEmpty)(HashLongestMatchQuickly* self) { |
| 49 if (self->is_dirty_) { |
| 50 /* It is not strictly necessary to fill this buffer here, but |
| 51 not filling will make the results of the compression stochastic |
| 52 (but correct). This is because random data would cause the |
| 53 system to find accidentally good backward references here and there. */ |
| 54 memset(&self->buckets_[0], 0, sizeof(self->buckets_)); |
| 55 self->is_dirty_ = BROTLI_FALSE; |
| 56 } |
| 57 } |
| 58 |
| 59 static void FN(InitForData)(HashLongestMatchQuickly* self, const uint8_t* data, |
| 60 size_t num) { |
| 61 size_t i; |
| 62 for (i = 0; i < num; ++i) { |
| 63 const uint32_t key = FN(HashBytes)(&data[i]); |
| 64 memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0])); |
| 65 } |
| 66 if (num != 0) { |
| 67 self->is_dirty_ = BROTLI_FALSE; |
| 68 } |
| 69 } |
| 70 |
| 71 static void FN(Init)( |
| 72 MemoryManager* m, HashLongestMatchQuickly* self, const uint8_t* data, |
| 73 const BrotliEncoderParams* params, size_t position, size_t bytes, |
| 74 BROTLI_BOOL is_last) { |
| 75 /* Choose which initialization method is faster. |
| 76 Init() is about 100 times faster than InitForData(). */ |
| 77 const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7; |
| 78 BROTLI_UNUSED(m); |
| 79 BROTLI_UNUSED(params); |
| 80 if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) { |
| 81 FN(InitForData)(self, data, bytes); |
| 82 } else { |
| 83 FN(InitEmpty)(self); |
| 84 } |
| 85 } |
| 86 |
| 87 /* Look at 5 bytes at &data[ix & mask]. |
| 88 Compute a hash from these, and store the value somewhere within |
| 89 [ix .. ix+3]. */ |
| 90 static BROTLI_INLINE void FN(Store)(HashLongestMatchQuickly* self, |
| 91 const uint8_t *data, const size_t mask, const size_t ix) { |
| 92 const uint32_t key = FN(HashBytes)(&data[ix & mask]); |
| 93 /* Wiggle the value with the bucket sweep range. */ |
| 94 const uint32_t off = (ix >> 3) % BUCKET_SWEEP; |
| 95 self->buckets_[key + off] = (uint32_t)ix; |
| 96 } |
| 97 |
| 98 static BROTLI_INLINE void FN(StoreRange)(HashLongestMatchQuickly* self, |
| 99 const uint8_t *data, const size_t mask, const size_t ix_start, |
| 100 const size_t ix_end) { |
| 101 size_t i; |
| 102 for (i = ix_start; i < ix_end; ++i) { |
| 103 FN(Store)(self, data, mask, i); |
| 104 } |
| 105 } |
| 106 |
| 107 static BROTLI_INLINE void FN(StitchToPreviousBlock)( |
| 108 HashLongestMatchQuickly* self, size_t num_bytes, size_t position, |
| 109 const uint8_t* ringbuffer, size_t ringbuffer_mask) { |
| 110 if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) { |
| 111 /* Prepare the hashes for three last bytes of the last write. |
| 112 These could not be calculated before, since they require knowledge |
| 113 of both the previous and the current block. */ |
| 114 FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3); |
| 115 FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2); |
| 116 FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1); |
| 117 } |
| 118 } |
| 119 |
| 120 /* Find a longest backward match of &data[cur_ix & ring_buffer_mask] |
| 121 up to the length of max_length and stores the position cur_ix in the |
| 122 hash table. |
| 123 |
| 124 Does not look for matches longer than max_length. |
| 125 Does not look for matches further away than max_backward. |
| 126 Writes the best match into |out|. |
| 127 Returns true if match is found, otherwise false. */ |
| 128 static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)( |
| 129 HashLongestMatchQuickly* self, const uint8_t* BROTLI_RESTRICT data, |
| 130 const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, |
| 131 const size_t cur_ix, const size_t max_length, const size_t max_backward, |
| 132 HasherSearchResult* BROTLI_RESTRICT out) { |
| 133 const size_t best_len_in = out->len; |
| 134 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; |
| 135 const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]); |
| 136 int compare_char = data[cur_ix_masked + best_len_in]; |
| 137 score_t best_score = out->score; |
| 138 size_t best_len = best_len_in; |
| 139 size_t cached_backward = (size_t)distance_cache[0]; |
| 140 size_t prev_ix = cur_ix - cached_backward; |
| 141 BROTLI_BOOL is_match_found = BROTLI_FALSE; |
| 142 out->len_x_code = 0; |
| 143 if (prev_ix < cur_ix) { |
| 144 prev_ix &= (uint32_t)ring_buffer_mask; |
| 145 if (compare_char == data[prev_ix + best_len]) { |
| 146 size_t len = FindMatchLengthWithLimit(&data[prev_ix], |
| 147 &data[cur_ix_masked], |
| 148 max_length); |
| 149 if (len >= 4) { |
| 150 best_score = BackwardReferenceScoreUsingLastDistance(len, 0); |
| 151 best_len = len; |
| 152 out->len = len; |
| 153 out->distance = cached_backward; |
| 154 out->score = best_score; |
| 155 compare_char = data[cur_ix_masked + best_len]; |
| 156 if (BUCKET_SWEEP == 1) { |
| 157 self->buckets_[key] = (uint32_t)cur_ix; |
| 158 return BROTLI_TRUE; |
| 159 } else { |
| 160 is_match_found = BROTLI_TRUE; |
| 161 } |
| 162 } |
| 163 } |
| 164 } |
| 165 if (BUCKET_SWEEP == 1) { |
| 166 size_t backward; |
| 167 size_t len; |
| 168 /* Only one to look for, don't bother to prepare for a loop. */ |
| 169 prev_ix = self->buckets_[key]; |
| 170 self->buckets_[key] = (uint32_t)cur_ix; |
| 171 backward = cur_ix - prev_ix; |
| 172 prev_ix &= (uint32_t)ring_buffer_mask; |
| 173 if (compare_char != data[prev_ix + best_len_in]) { |
| 174 return BROTLI_FALSE; |
| 175 } |
| 176 if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) { |
| 177 return BROTLI_FALSE; |
| 178 } |
| 179 len = FindMatchLengthWithLimit(&data[prev_ix], |
| 180 &data[cur_ix_masked], |
| 181 max_length); |
| 182 if (len >= 4) { |
| 183 out->len = len; |
| 184 out->distance = backward; |
| 185 out->score = BackwardReferenceScore(len, backward); |
| 186 return BROTLI_TRUE; |
| 187 } |
| 188 } else { |
| 189 uint32_t *bucket = self->buckets_ + key; |
| 190 int i; |
| 191 prev_ix = *bucket++; |
| 192 for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) { |
| 193 const size_t backward = cur_ix - prev_ix; |
| 194 size_t len; |
| 195 prev_ix &= (uint32_t)ring_buffer_mask; |
| 196 if (compare_char != data[prev_ix + best_len]) { |
| 197 continue; |
| 198 } |
| 199 if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) { |
| 200 continue; |
| 201 } |
| 202 len = FindMatchLengthWithLimit(&data[prev_ix], |
| 203 &data[cur_ix_masked], |
| 204 max_length); |
| 205 if (len >= 4) { |
| 206 const score_t score = BackwardReferenceScore(len, backward); |
| 207 if (best_score < score) { |
| 208 best_score = score; |
| 209 best_len = len; |
| 210 out->len = best_len; |
| 211 out->distance = backward; |
| 212 out->score = score; |
| 213 compare_char = data[cur_ix_masked + best_len]; |
| 214 is_match_found = BROTLI_TRUE; |
| 215 } |
| 216 } |
| 217 } |
| 218 } |
| 219 if (USE_DICTIONARY && !is_match_found) { |
| 220 is_match_found = SearchInStaticDictionary(&self->dict_search_stats_, |
| 221 &data[cur_ix_masked], max_length, max_backward, out, BROTLI_TRUE); |
| 222 } |
| 223 self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix; |
| 224 return is_match_found; |
| 225 } |
| 226 |
| 227 #undef HASH_MAP_SIZE |
| 228 #undef BUCKET_SIZE |
| 229 |
| 230 #undef HashLongestMatchQuickly |
OLD | NEW |