| OLD | NEW |
| 1 /* Copyright 2013 Google Inc. All Rights Reserved. | 1 /* Copyright 2013 Google Inc. All Rights Reserved. |
| 2 | 2 |
| 3 Distributed under MIT license. | 3 Distributed under MIT license. |
| 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| 5 */ | 5 */ |
| 6 | 6 |
| 7 #include "./static_dict.h" | 7 #include "./static_dict.h" |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include "../common/dictionary.h" |
| 10 #include "./find_match_length.h" |
| 11 #include "./port.h" |
| 12 #include "./static_dict_lut.h" |
| 10 | 13 |
| 11 #include "./dictionary.h" | 14 #if defined(__cplusplus) || defined(c_plusplus) |
| 12 #include "./find_match_length.h" | 15 extern "C" { |
| 13 #include "./static_dict_lut.h" | 16 #endif |
| 14 #include "./transform.h" | |
| 15 | 17 |
| 16 namespace brotli { | 18 static const uint8_t kUppercaseFirst = 10; |
| 19 static const uint8_t kOmitLastNTransforms[10] = { |
| 20 0, 12, 27, 23, 42, 63, 56, 48, 59, 64, |
| 21 }; |
| 17 | 22 |
| 18 inline uint32_t Hash(const uint8_t *data) { | 23 static BROTLI_INLINE uint32_t Hash(const uint8_t *data) { |
| 19 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32; | 24 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32; |
| 20 // The higher bits contain more mixture from the multiplication, | 25 /* The higher bits contain more mixture from the multiplication, |
| 21 // so we take our results from there. | 26 so we take our results from there. */ |
| 22 return h >> (32 - kDictNumBits); | 27 return h >> (32 - kDictNumBits); |
| 23 } | 28 } |
| 24 | 29 |
| 25 inline void AddMatch(size_t distance, size_t len, size_t len_code, | 30 static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code, |
| 26 uint32_t* matches) { | 31 uint32_t* matches) { |
| 27 uint32_t match = static_cast<uint32_t>((distance << 5) + len_code); | 32 uint32_t match = (uint32_t)((distance << 5) + len_code); |
| 28 matches[len] = std::min(matches[len], match); | 33 matches[len] = BROTLI_MIN(uint32_t, matches[len], match); |
| 29 } | 34 } |
| 30 | 35 |
| 31 inline size_t DictMatchLength(const uint8_t* data, | 36 static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data, |
| 32 size_t id, | 37 size_t id, |
| 33 size_t len, | 38 size_t len, |
| 34 size_t maxlen) { | 39 size_t maxlen) { |
| 35 const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id; | 40 const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id; |
| 36 return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data, | 41 return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data, |
| 37 std::min(len, maxlen)); | 42 BROTLI_MIN(size_t, len, maxlen)); |
| 38 } | 43 } |
| 39 | 44 |
| 40 inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) { | 45 static BROTLI_INLINE BROTLI_BOOL IsMatch( |
| 41 if (w.len > max_length) return false; | 46 DictWord w, const uint8_t* data, size_t max_length) { |
| 42 const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx; | 47 if (w.len > max_length) { |
| 43 const uint8_t* dict = &kBrotliDictionary[offset]; | 48 return BROTLI_FALSE; |
| 44 if (w.transform == 0) { | |
| 45 // Match against base dictionary word. | |
| 46 return FindMatchLengthWithLimit(dict, data, w.len) == w.len; | |
| 47 } else if (w.transform == 10) { | |
| 48 // Match against uppercase first transform. | |
| 49 // Note that there are only ASCII uppercase words in the lookup table. | |
| 50 return (dict[0] >= 'a' && dict[0] <= 'z' && | |
| 51 (dict[0] ^ 32) == data[0] && | |
| 52 FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) == | |
| 53 w.len - 1u); | |
| 54 } else { | 49 } else { |
| 55 // Match against uppercase all transform. | 50 const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + |
| 56 // Note that there are only ASCII uppercase words in the lookup table. | 51 (size_t)w.len * (size_t)w.idx; |
| 57 for (size_t i = 0; i < w.len; ++i) { | 52 const uint8_t* dict = &kBrotliDictionary[offset]; |
| 58 if (dict[i] >= 'a' && dict[i] <= 'z') { | 53 if (w.transform == 0) { |
| 59 if ((dict[i] ^ 32) != data[i]) return false; | 54 /* Match against base dictionary word. */ |
| 60 } else { | 55 return |
| 61 if (dict[i] != data[i]) return false; | 56 TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len); |
| 57 } else if (w.transform == 10) { |
| 58 /* Match against uppercase first transform. |
| 59 Note that there are only ASCII uppercase words in the lookup table. */ |
| 60 return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' && |
| 61 (dict[0] ^ 32) == data[0] && |
| 62 FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) == |
| 63 w.len - 1u); |
| 64 } else { |
| 65 /* Match against uppercase all transform. |
| 66 Note that there are only ASCII uppercase words in the lookup table. */ |
| 67 size_t i; |
| 68 for (i = 0; i < w.len; ++i) { |
| 69 if (dict[i] >= 'a' && dict[i] <= 'z') { |
| 70 if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE; |
| 71 } else { |
| 72 if (dict[i] != data[i]) return BROTLI_FALSE; |
| 73 } |
| 62 } | 74 } |
| 75 return BROTLI_TRUE; |
| 63 } | 76 } |
| 64 return true; | |
| 65 } | 77 } |
| 66 } | 78 } |
| 67 | 79 |
| 68 bool FindAllStaticDictionaryMatches(const uint8_t* data, | 80 BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( |
| 69 size_t min_length, | 81 const uint8_t* data, size_t min_length, size_t max_length, |
| 70 size_t max_length, | 82 uint32_t* matches) { |
| 71 uint32_t* matches) { | 83 BROTLI_BOOL has_found_match = BROTLI_FALSE; |
| 72 bool found_match = false; | 84 { |
| 73 size_t key = Hash(data); | 85 size_t offset = kStaticDictionaryBuckets[Hash(data)]; |
| 74 size_t bucket = kStaticDictionaryBuckets[key]; | 86 BROTLI_BOOL end = !offset; |
| 75 if (bucket != 0) { | 87 while (!end) { |
| 76 size_t num = bucket & 0xff; | 88 DictWord w = kStaticDictionaryWords[offset++]; |
| 77 size_t offset = bucket >> 8; | 89 const size_t l = w.len & 0x7F; |
| 78 for (size_t i = 0; i < num; ++i) { | 90 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l]; |
| 79 const DictWord w = kStaticDictionaryWords[offset + i]; | |
| 80 const size_t l = w.len; | |
| 81 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l]; | |
| 82 const size_t id = w.idx; | 91 const size_t id = w.idx; |
| 92 end = !!(w.len & 0x80); |
| 93 w.len = (uint8_t)l; |
| 83 if (w.transform == 0) { | 94 if (w.transform == 0) { |
| 84 const size_t matchlen = DictMatchLength(data, id, l, max_length); | 95 const size_t matchlen = DictMatchLength(data, id, l, max_length); |
| 85 // Transform "" + kIdentity + "" | 96 const uint8_t* s; |
| 97 size_t minlen; |
| 98 size_t maxlen; |
| 99 size_t len; |
| 100 /* Transform "" + kIdentity + "" */ |
| 86 if (matchlen == l) { | 101 if (matchlen == l) { |
| 87 AddMatch(id, l, l, matches); | 102 AddMatch(id, l, l, matches); |
| 88 found_match = true; | 103 has_found_match = BROTLI_TRUE; |
| 89 } | 104 } |
| 90 // Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " | 105 /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */ |
| 91 if (matchlen >= l - 1) { | 106 if (matchlen >= l - 1) { |
| 92 AddMatch(id + 12 * n, l - 1, l, matches); | 107 AddMatch(id + 12 * n, l - 1, l, matches); |
| 93 if (l + 2 < max_length && | 108 if (l + 2 < max_length && |
| 94 data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' && | 109 data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' && |
| 95 data[l + 2] == ' ') { | 110 data[l + 2] == ' ') { |
| 96 AddMatch(id + 49 * n, l + 3, l, matches); | 111 AddMatch(id + 49 * n, l + 3, l, matches); |
| 97 } | 112 } |
| 98 found_match = true; | 113 has_found_match = BROTLI_TRUE; |
| 99 } | 114 } |
| 100 // Transform "" + kOmitLastN + "" (N = 2 .. 9) | 115 /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */ |
| 101 size_t minlen = min_length; | 116 minlen = min_length; |
| 102 if (l > 9) minlen = std::max(minlen, l - 9); | 117 if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9); |
| 103 size_t maxlen = std::min(matchlen, l - 2); | 118 maxlen = BROTLI_MIN(size_t, matchlen, l - 2); |
| 104 for (size_t len = minlen; len <= maxlen; ++len) { | 119 for (len = minlen; len <= maxlen; ++len) { |
| 105 AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches); | 120 AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches); |
| 106 found_match = true; | 121 has_found_match = BROTLI_TRUE; |
| 107 } | 122 } |
| 108 if (matchlen < l || l + 6 >= max_length) { | 123 if (matchlen < l || l + 6 >= max_length) { |
| 109 continue; | 124 continue; |
| 110 } | 125 } |
| 111 const uint8_t* s = &data[l]; | 126 s = &data[l]; |
| 112 // Transforms "" + kIdentity + <suffix> | 127 /* Transforms "" + kIdentity + <suffix> */ |
| 113 if (s[0] == ' ') { | 128 if (s[0] == ' ') { |
| 114 AddMatch(id + n, l + 1, l, matches); | 129 AddMatch(id + n, l + 1, l, matches); |
| 115 if (s[1] == 'a') { | 130 if (s[1] == 'a') { |
| 116 if (s[2] == ' ') { | 131 if (s[2] == ' ') { |
| 117 AddMatch(id + 28 * n, l + 3, l, matches); | 132 AddMatch(id + 28 * n, l + 3, l, matches); |
| 118 } else if (s[2] == 's') { | 133 } else if (s[2] == 's') { |
| 119 if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches); | 134 if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches); |
| 120 } else if (s[2] == 't') { | 135 } else if (s[2] == 't') { |
| 121 if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches); | 136 if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches); |
| 122 } else if (s[2] == 'n') { | 137 } else if (s[2] == 'n') { |
| 123 if (s[3] == 'd' && s[4] == ' ') { | 138 if (s[3] == 'd' && s[4] == ' ') { |
| 124 AddMatch(id + 10 * n, l + 5, l, matches); | 139 AddMatch(id + 10 * n, l + 5, l, matches); |
| 125 } | 140 } |
| 126 } | 141 } |
| 127 } else if (s[1] == 'b') { | 142 } else if (s[1] == 'b') { |
| 128 if (s[2] == 'y' && s[3] == ' ') { | 143 if (s[2] == 'y' && s[3] == ' ') { |
| 129 AddMatch(id + 38 * n, l + 4, l, matches); | 144 AddMatch(id + 38 * n, l + 4, l, matches); |
| 130 } | 145 } |
| 131 } else if (s[1] == 'i') { | 146 } else if (s[1] == 'i') { |
| 132 if (s[2] == 'n') { | 147 if (s[2] == 'n') { |
| 133 if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches); | 148 if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches); |
| 134 } else if (s[2] == 's') { | 149 } else if (s[2] == 's') { |
| 135 if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches); | 150 if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches); |
| 136 } | 151 } |
| 137 } else if (s[1] == 'f') { | 152 } else if (s[1] == 'f') { |
| 138 if (s[2] == 'o') { | 153 if (s[2] == 'o') { |
| 139 if (s[3] == 'r' && s[4] == ' ') { | 154 if (s[3] == 'r' && s[4] == ' ') { |
| 140 AddMatch(id + 25 * n, l + 5, l, matches); | 155 AddMatch(id + 25 * n, l + 5, l, matches); |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 228 AddMatch(id + 95 * n, l + 4, l, matches); | 243 AddMatch(id + 95 * n, l + 4, l, matches); |
| 229 } | 244 } |
| 230 } | 245 } |
| 231 } else if (s[0] == 'f') { | 246 } else if (s[0] == 'f') { |
| 232 if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') { | 247 if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') { |
| 233 AddMatch(id + 90 * n, l + 4, l, matches); | 248 AddMatch(id + 90 * n, l + 4, l, matches); |
| 234 } | 249 } |
| 235 } else if (s[0] == 'i') { | 250 } else if (s[0] == 'i') { |
| 236 if (s[1] == 'v') { | 251 if (s[1] == 'v') { |
| 237 if (s[2] == 'e' && s[3] == ' ') { | 252 if (s[2] == 'e' && s[3] == ' ') { |
| 238 AddMatch(id + 92 * n, l + 4, l, matches); | 253 AddMatch(id + 92 * n, l + 4, l, matches); |
| 239 } | 254 } |
| 240 } else if (s[1] == 'z') { | 255 } else if (s[1] == 'z') { |
| 241 if (s[2] == 'e' && s[3] == ' ') { | 256 if (s[2] == 'e' && s[3] == ' ') { |
| 242 AddMatch(id + 100 * n, l + 4, l, matches); | 257 AddMatch(id + 100 * n, l + 4, l, matches); |
| 243 } | 258 } |
| 244 } | 259 } |
| 245 } else if (s[0] == 'l') { | 260 } else if (s[0] == 'l') { |
| 246 if (s[1] == 'e') { | 261 if (s[1] == 'e') { |
| 247 if (s[2] == 's' && s[3] == 's' && s[4] == ' ') { | 262 if (s[2] == 's' && s[3] == 's' && s[4] == ' ') { |
| 248 AddMatch(id + 93 * n, l + 5, l, matches); | 263 AddMatch(id + 93 * n, l + 5, l, matches); |
| 249 } | 264 } |
| 250 } else if (s[1] == 'y') { | 265 } else if (s[1] == 'y') { |
| 251 if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches); | 266 if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches); |
| 252 } | 267 } |
| 253 } else if (s[0] == 'o') { | 268 } else if (s[0] == 'o') { |
| 254 if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') { | 269 if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') { |
| 255 AddMatch(id + 106 * n, l + 4, l, matches); | 270 AddMatch(id + 106 * n, l + 4, l, matches); |
| 256 } | 271 } |
| 257 } | 272 } |
| 258 } else { | 273 } else { |
| 259 // Set t=false for kUppercaseFirst and | 274 /* Set is_all_caps=0 for kUppercaseFirst and |
| 260 // t=true otherwise (kUppercaseAll) transform. | 275 is_all_caps=1 otherwise (kUppercaseAll) transform. */ |
| 261 const bool t = w.transform != kUppercaseFirst; | 276 const BROTLI_BOOL is_all_caps = |
| 277 TO_BROTLI_BOOL(w.transform != kUppercaseFirst); |
| 278 const uint8_t* s; |
| 262 if (!IsMatch(w, data, max_length)) { | 279 if (!IsMatch(w, data, max_length)) { |
| 263 continue; | 280 continue; |
| 264 } | 281 } |
| 265 // Transform "" + kUppercase{First,All} + "" | 282 /* Transform "" + kUppercase{First,All} + "" */ |
| 266 AddMatch(id + (t ? 44 : 9) * n, l, l, matches); | 283 AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches); |
| 267 found_match = true; | 284 has_found_match = BROTLI_TRUE; |
| 268 if (l + 1 >= max_length) { | 285 if (l + 1 >= max_length) { |
| 269 continue; | 286 continue; |
| 270 } | 287 } |
| 271 // Transforms "" + kUppercase{First,All} + <suffix> | 288 /* Transforms "" + kUppercase{First,All} + <suffix> */ |
| 272 const uint8_t* s = &data[l]; | 289 s = &data[l]; |
| 273 if (s[0] == ' ') { | 290 if (s[0] == ' ') { |
| 274 AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches); | 291 AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches); |
| 275 } else if (s[0] == '"') { | 292 } else if (s[0] == '"') { |
| 276 AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches); | 293 AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches); |
| 277 if (s[1] == '>') { | 294 if (s[1] == '>') { |
| 278 AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches); | 295 AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches); |
| 279 } | 296 } |
| 280 } else if (s[0] == '.') { | 297 } else if (s[0] == '.') { |
| 281 AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches); | 298 AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches); |
| 282 if (s[1] == ' ') { | 299 if (s[1] == ' ') { |
| 283 AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches); | 300 AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches); |
| 284 } | 301 } |
| 285 } else if (s[0] == ',') { | 302 } else if (s[0] == ',') { |
| 286 AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches); | 303 AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches); |
| 287 if (s[1] == ' ') { | 304 if (s[1] == ' ') { |
| 288 AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches); | 305 AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches); |
| 289 } | 306 } |
| 290 } else if (s[0] == '\'') { | 307 } else if (s[0] == '\'') { |
| 291 AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches); | 308 AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches); |
| 292 } else if (s[0] == '(') { | 309 } else if (s[0] == '(') { |
| 293 AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches); | 310 AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches); |
| 294 } else if (s[0] == '=') { | 311 } else if (s[0] == '=') { |
| 295 if (s[1] == '"') { | 312 if (s[1] == '"') { |
| 296 AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches); | 313 AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches); |
| 297 } else if (s[1] == '\'') { | 314 } else if (s[1] == '\'') { |
| 298 AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches); | 315 AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches); |
| 299 } | 316 } |
| 300 } | 317 } |
| 301 } | 318 } |
| 302 } | 319 } |
| 303 } | 320 } |
| 304 // Transforms with prefixes " " and "." | 321 /* Transforms with prefixes " " and "." */ |
| 305 if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { | 322 if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { |
| 306 bool is_space = (data[0] == ' '); | 323 BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' '); |
| 307 key = Hash(&data[1]); | 324 size_t offset = kStaticDictionaryBuckets[Hash(&data[1])]; |
| 308 bucket = kStaticDictionaryBuckets[key]; | 325 BROTLI_BOOL end = !offset; |
| 309 size_t num = bucket & 0xff; | 326 while (!end) { |
| 310 size_t offset = bucket >> 8; | 327 DictWord w = kStaticDictionaryWords[offset++]; |
| 311 for (size_t i = 0; i < num; ++i) { | 328 const size_t l = w.len & 0x7F; |
| 312 const DictWord w = kStaticDictionaryWords[offset + i]; | 329 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l]; |
| 313 const size_t l = w.len; | |
| 314 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l]; | |
| 315 const size_t id = w.idx; | 330 const size_t id = w.idx; |
| 331 end = !!(w.len & 0x80); |
| 332 w.len = (uint8_t)l; |
| 316 if (w.transform == 0) { | 333 if (w.transform == 0) { |
| 334 const uint8_t* s; |
| 317 if (!IsMatch(w, &data[1], max_length - 1)) { | 335 if (!IsMatch(w, &data[1], max_length - 1)) { |
| 318 continue; | 336 continue; |
| 319 } | 337 } |
| 320 // Transforms " " + kIdentity + "" and "." + kIdentity + "" | 338 /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */ |
| 321 AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); | 339 AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); |
| 322 found_match = true; | 340 has_found_match = BROTLI_TRUE; |
| 323 if (l + 2 >= max_length) { | 341 if (l + 2 >= max_length) { |
| 324 continue; | 342 continue; |
| 325 } | 343 } |
| 326 // Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix> | 344 /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix> |
| 327 const uint8_t* s = &data[l + 1]; | 345 */ |
| 346 s = &data[l + 1]; |
| 328 if (s[0] == ' ') { | 347 if (s[0] == ' ') { |
| 329 AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches); | 348 AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches); |
| 330 } else if (s[0] == '(') { | 349 } else if (s[0] == '(') { |
| 331 AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches); | 350 AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches); |
| 332 } else if (is_space) { | 351 } else if (is_space) { |
| 333 if (s[0] == ',') { | 352 if (s[0] == ',') { |
| 334 AddMatch(id + 103 * n, l + 2, l, matches); | 353 AddMatch(id + 103 * n, l + 2, l, matches); |
| 335 if (s[1] == ' ') { | 354 if (s[1] == ' ') { |
| 336 AddMatch(id + 33 * n, l + 3, l, matches); | 355 AddMatch(id + 33 * n, l + 3, l, matches); |
| 337 } | 356 } |
| 338 } else if (s[0] == '.') { | 357 } else if (s[0] == '.') { |
| 339 AddMatch(id + 71 * n, l + 2, l, matches); | 358 AddMatch(id + 71 * n, l + 2, l, matches); |
| 340 if (s[1] == ' ') { | 359 if (s[1] == ' ') { |
| 341 AddMatch(id + 52 * n, l + 3, l, matches); | 360 AddMatch(id + 52 * n, l + 3, l, matches); |
| 342 } | 361 } |
| 343 } else if (s[0] == '=') { | 362 } else if (s[0] == '=') { |
| 344 if (s[1] == '"') { | 363 if (s[1] == '"') { |
| 345 AddMatch(id + 81 * n, l + 3, l, matches); | 364 AddMatch(id + 81 * n, l + 3, l, matches); |
| 346 } else if (s[1] == '\'') { | 365 } else if (s[1] == '\'') { |
| 347 AddMatch(id + 98 * n, l + 3, l, matches); | 366 AddMatch(id + 98 * n, l + 3, l, matches); |
| 348 } | 367 } |
| 349 } | 368 } |
| 350 } | 369 } |
| 351 } else if (is_space) { | 370 } else if (is_space) { |
| 352 // Set t=false for kUppercaseFirst and | 371 /* Set is_all_caps=0 for kUppercaseFirst and |
| 353 // t=true otherwise (kUppercaseAll) transform. | 372 is_all_caps=1 otherwise (kUppercaseAll) transform. */ |
| 354 const bool t = w.transform != kUppercaseFirst; | 373 const BROTLI_BOOL is_all_caps = |
| 374 TO_BROTLI_BOOL(w.transform != kUppercaseFirst); |
| 375 const uint8_t* s; |
| 355 if (!IsMatch(w, &data[1], max_length - 1)) { | 376 if (!IsMatch(w, &data[1], max_length - 1)) { |
| 356 continue; | 377 continue; |
| 357 } | 378 } |
| 358 // Transforms " " + kUppercase{First,All} + "" | 379 /* Transforms " " + kUppercase{First,All} + "" */ |
| 359 AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches); | 380 AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches); |
| 360 found_match = true; | 381 has_found_match = BROTLI_TRUE; |
| 361 if (l + 2 >= max_length) { | 382 if (l + 2 >= max_length) { |
| 362 continue; | 383 continue; |
| 363 } | 384 } |
| 364 // Transforms " " + kUppercase{First,All} + <suffix> | 385 /* Transforms " " + kUppercase{First,All} + <suffix> */ |
| 365 const uint8_t* s = &data[l + 1]; | 386 s = &data[l + 1]; |
| 366 if (s[0] == ' ') { | 387 if (s[0] == ' ') { |
| 367 AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches); | 388 AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches); |
| 368 } else if (s[0] == ',') { | 389 } else if (s[0] == ',') { |
| 369 if (!t) { | 390 if (!is_all_caps) { |
| 370 AddMatch(id + 109 * n, l + 2, l, matches); | 391 AddMatch(id + 109 * n, l + 2, l, matches); |
| 371 } | 392 } |
| 372 if (s[1] == ' ') { | 393 if (s[1] == ' ') { |
| 373 AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches); | 394 AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches); |
| 374 } | 395 } |
| 375 } else if (s[0] == '.') { | 396 } else if (s[0] == '.') { |
| 376 AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches); | 397 AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches); |
| 377 if (s[1] == ' ') { | 398 if (s[1] == ' ') { |
| 378 AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches); | 399 AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches); |
| 379 } | 400 } |
| 380 } else if (s[0] == '=') { | 401 } else if (s[0] == '=') { |
| 381 if (s[1] == '"') { | 402 if (s[1] == '"') { |
| 382 AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches); | 403 AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches); |
| 383 } else if (s[1] == '\'') { | 404 } else if (s[1] == '\'') { |
| 384 AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches); | 405 AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches); |
| 385 } | 406 } |
| 386 } | 407 } |
| 387 } | 408 } |
| 388 } | 409 } |
| 389 } | 410 } |
| 390 if (max_length >= 6) { | 411 if (max_length >= 6) { |
| 391 // Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" | 412 /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */ |
| 392 if ((data[1] == ' ' && | 413 if ((data[1] == ' ' && |
| 393 (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || | 414 (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || |
| 394 (data[0] == 0xc2 && data[1] == 0xa0)) { | 415 (data[0] == 0xc2 && data[1] == 0xa0)) { |
| 395 key = Hash(&data[2]); | 416 size_t offset = kStaticDictionaryBuckets[Hash(&data[2])]; |
| 396 bucket = kStaticDictionaryBuckets[key]; | 417 BROTLI_BOOL end = !offset; |
| 397 size_t num = bucket & 0xff; | 418 while (!end) { |
| 398 size_t offset = bucket >> 8; | 419 DictWord w = kStaticDictionaryWords[offset++]; |
| 399 for (size_t i = 0; i < num; ++i) { | 420 const size_t l = w.len & 0x7F; |
| 400 const DictWord w = kStaticDictionaryWords[offset + i]; | 421 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l]; |
| 401 const size_t l = w.len; | |
| 402 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l]; | |
| 403 const size_t id = w.idx; | 422 const size_t id = w.idx; |
| 423 end = !!(w.len & 0x80); |
| 424 w.len = (uint8_t)l; |
| 404 if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) { | 425 if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) { |
| 405 if (data[0] == 0xc2) { | 426 if (data[0] == 0xc2) { |
| 406 AddMatch(id + 102 * n, l + 2, l, matches); | 427 AddMatch(id + 102 * n, l + 2, l, matches); |
| 407 found_match = true; | 428 has_found_match = BROTLI_TRUE; |
| 408 } else if (l + 2 < max_length && data[l + 2] == ' ') { | 429 } else if (l + 2 < max_length && data[l + 2] == ' ') { |
| 409 size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13); | 430 size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13); |
| 410 AddMatch(id + t * n, l + 3, l, matches); | 431 AddMatch(id + t * n, l + 3, l, matches); |
| 411 found_match = true; | 432 has_found_match = BROTLI_TRUE; |
| 412 } | 433 } |
| 413 } | 434 } |
| 414 } | 435 } |
| 415 } | 436 } |
| 416 } | 437 } |
| 417 if (max_length >= 9) { | 438 if (max_length >= 9) { |
| 418 // Transforms with prefixes " the " and ".com/" | 439 /* Transforms with prefixes " the " and ".com/" */ |
| 419 if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' && | 440 if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' && |
| 420 data[3] == 'e' && data[4] == ' ') || | 441 data[3] == 'e' && data[4] == ' ') || |
| 421 (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && | 442 (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && |
| 422 data[3] == 'm' && data[4] == '/')) { | 443 data[3] == 'm' && data[4] == '/')) { |
| 423 key = Hash(&data[5]); | 444 size_t offset = kStaticDictionaryBuckets[Hash(&data[5])]; |
| 424 bucket = kStaticDictionaryBuckets[key]; | 445 BROTLI_BOOL end = !offset; |
| 425 size_t num = bucket & 0xff; | 446 while (!end) { |
| 426 size_t offset = bucket >> 8; | 447 DictWord w = kStaticDictionaryWords[offset++]; |
| 427 for (size_t i = 0; i < num; ++i) { | 448 const size_t l = w.len & 0x7F; |
| 428 const DictWord w = kStaticDictionaryWords[offset + i]; | 449 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l]; |
| 429 const size_t l = w.len; | |
| 430 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l]; | |
| 431 const size_t id = w.idx; | 450 const size_t id = w.idx; |
| 451 end = !!(w.len & 0x80); |
| 452 w.len = (uint8_t)l; |
| 432 if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) { | 453 if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) { |
| 433 AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); | 454 AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); |
| 434 found_match = true; | 455 has_found_match = BROTLI_TRUE; |
| 435 if (l + 5 < max_length) { | 456 if (l + 5 < max_length) { |
| 436 const uint8_t* s = &data[l + 5]; | 457 const uint8_t* s = &data[l + 5]; |
| 437 if (data[0] == ' ') { | 458 if (data[0] == ' ') { |
| 438 if (l + 8 < max_length && | 459 if (l + 8 < max_length && |
| 439 s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') { | 460 s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') { |
| 440 AddMatch(id + 62 * n, l + 9, l, matches); | 461 AddMatch(id + 62 * n, l + 9, l, matches); |
| 441 if (l + 12 < max_length && | 462 if (l + 12 < max_length && |
| 442 s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') { | 463 s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') { |
| 443 AddMatch(id + 73 * n, l + 13, l, matches); | 464 AddMatch(id + 73 * n, l + 13, l, matches); |
| 444 } | 465 } |
| 445 } | 466 } |
| 446 } | 467 } |
| 447 } | 468 } |
| 448 } | 469 } |
| 449 } | 470 } |
| 450 } | 471 } |
| 451 } | 472 } |
| 452 return found_match; | 473 return has_found_match; |
| 453 } | 474 } |
| 454 | 475 |
| 455 } // namespace brotli | 476 #if defined(__cplusplus) || defined(c_plusplus) |
| 477 } /* extern "C" */ |
| 478 #endif |
| OLD | NEW |