OLD | NEW |
1 /* Copyright 2013 Google Inc. All Rights Reserved. | 1 /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | 2 |
3 Distributed under MIT license. | 3 Distributed under MIT license. |
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 */ | 5 */ |
6 | 6 |
7 #include "./static_dict.h" | 7 #include "./static_dict.h" |
8 | 8 |
9 #include <algorithm> | 9 #include "../common/dictionary.h" |
| 10 #include "./find_match_length.h" |
| 11 #include "./port.h" |
| 12 #include "./static_dict_lut.h" |
10 | 13 |
11 #include "./dictionary.h" | 14 #if defined(__cplusplus) || defined(c_plusplus) |
12 #include "./find_match_length.h" | 15 extern "C" { |
13 #include "./static_dict_lut.h" | 16 #endif |
14 #include "./transform.h" | |
15 | 17 |
16 namespace brotli { | 18 static const uint8_t kUppercaseFirst = 10; |
| 19 static const uint8_t kOmitLastNTransforms[10] = { |
| 20 0, 12, 27, 23, 42, 63, 56, 48, 59, 64, |
| 21 }; |
17 | 22 |
18 inline uint32_t Hash(const uint8_t *data) { | 23 static BROTLI_INLINE uint32_t Hash(const uint8_t *data) { |
19 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32; | 24 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32; |
20 // The higher bits contain more mixture from the multiplication, | 25 /* The higher bits contain more mixture from the multiplication, |
21 // so we take our results from there. | 26 so we take our results from there. */ |
22 return h >> (32 - kDictNumBits); | 27 return h >> (32 - kDictNumBits); |
23 } | 28 } |
24 | 29 |
25 inline void AddMatch(size_t distance, size_t len, size_t len_code, | 30 static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code, |
26 uint32_t* matches) { | 31 uint32_t* matches) { |
27 uint32_t match = static_cast<uint32_t>((distance << 5) + len_code); | 32 uint32_t match = (uint32_t)((distance << 5) + len_code); |
28 matches[len] = std::min(matches[len], match); | 33 matches[len] = BROTLI_MIN(uint32_t, matches[len], match); |
29 } | 34 } |
30 | 35 |
31 inline size_t DictMatchLength(const uint8_t* data, | 36 static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data, |
32 size_t id, | 37 size_t id, |
33 size_t len, | 38 size_t len, |
34 size_t maxlen) { | 39 size_t maxlen) { |
35 const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id; | 40 const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id; |
36 return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data, | 41 return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data, |
37 std::min(len, maxlen)); | 42 BROTLI_MIN(size_t, len, maxlen)); |
38 } | 43 } |
39 | 44 |
40 inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) { | 45 static BROTLI_INLINE BROTLI_BOOL IsMatch( |
41 if (w.len > max_length) return false; | 46 DictWord w, const uint8_t* data, size_t max_length) { |
42 const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx; | 47 if (w.len > max_length) { |
43 const uint8_t* dict = &kBrotliDictionary[offset]; | 48 return BROTLI_FALSE; |
44 if (w.transform == 0) { | |
45 // Match against base dictionary word. | |
46 return FindMatchLengthWithLimit(dict, data, w.len) == w.len; | |
47 } else if (w.transform == 10) { | |
48 // Match against uppercase first transform. | |
49 // Note that there are only ASCII uppercase words in the lookup table. | |
50 return (dict[0] >= 'a' && dict[0] <= 'z' && | |
51 (dict[0] ^ 32) == data[0] && | |
52 FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) == | |
53 w.len - 1u); | |
54 } else { | 49 } else { |
55 // Match against uppercase all transform. | 50 const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + |
56 // Note that there are only ASCII uppercase words in the lookup table. | 51 (size_t)w.len * (size_t)w.idx; |
57 for (size_t i = 0; i < w.len; ++i) { | 52 const uint8_t* dict = &kBrotliDictionary[offset]; |
58 if (dict[i] >= 'a' && dict[i] <= 'z') { | 53 if (w.transform == 0) { |
59 if ((dict[i] ^ 32) != data[i]) return false; | 54 /* Match against base dictionary word. */ |
60 } else { | 55 return |
61 if (dict[i] != data[i]) return false; | 56 TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len); |
| 57 } else if (w.transform == 10) { |
| 58 /* Match against uppercase first transform. |
| 59 Note that there are only ASCII uppercase words in the lookup table. */ |
| 60 return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' && |
| 61 (dict[0] ^ 32) == data[0] && |
| 62 FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) == |
| 63 w.len - 1u); |
| 64 } else { |
| 65 /* Match against uppercase all transform. |
| 66 Note that there are only ASCII uppercase words in the lookup table. */ |
| 67 size_t i; |
| 68 for (i = 0; i < w.len; ++i) { |
| 69 if (dict[i] >= 'a' && dict[i] <= 'z') { |
| 70 if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE; |
| 71 } else { |
| 72 if (dict[i] != data[i]) return BROTLI_FALSE; |
| 73 } |
62 } | 74 } |
| 75 return BROTLI_TRUE; |
63 } | 76 } |
64 return true; | |
65 } | 77 } |
66 } | 78 } |
67 | 79 |
68 bool FindAllStaticDictionaryMatches(const uint8_t* data, | 80 BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( |
69 size_t min_length, | 81 const uint8_t* data, size_t min_length, size_t max_length, |
70 size_t max_length, | 82 uint32_t* matches) { |
71 uint32_t* matches) { | 83 BROTLI_BOOL has_found_match = BROTLI_FALSE; |
72 bool found_match = false; | 84 { |
73 size_t key = Hash(data); | 85 size_t offset = kStaticDictionaryBuckets[Hash(data)]; |
74 size_t bucket = kStaticDictionaryBuckets[key]; | 86 BROTLI_BOOL end = !offset; |
75 if (bucket != 0) { | 87 while (!end) { |
76 size_t num = bucket & 0xff; | 88 DictWord w = kStaticDictionaryWords[offset++]; |
77 size_t offset = bucket >> 8; | 89 const size_t l = w.len & 0x7F; |
78 for (size_t i = 0; i < num; ++i) { | 90 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l]; |
79 const DictWord w = kStaticDictionaryWords[offset + i]; | |
80 const size_t l = w.len; | |
81 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l]; | |
82 const size_t id = w.idx; | 91 const size_t id = w.idx; |
| 92 end = !!(w.len & 0x80); |
| 93 w.len = (uint8_t)l; |
83 if (w.transform == 0) { | 94 if (w.transform == 0) { |
84 const size_t matchlen = DictMatchLength(data, id, l, max_length); | 95 const size_t matchlen = DictMatchLength(data, id, l, max_length); |
85 // Transform "" + kIdentity + "" | 96 const uint8_t* s; |
| 97 size_t minlen; |
| 98 size_t maxlen; |
| 99 size_t len; |
| 100 /* Transform "" + kIdentity + "" */ |
86 if (matchlen == l) { | 101 if (matchlen == l) { |
87 AddMatch(id, l, l, matches); | 102 AddMatch(id, l, l, matches); |
88 found_match = true; | 103 has_found_match = BROTLI_TRUE; |
89 } | 104 } |
90 // Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " | 105 /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */ |
91 if (matchlen >= l - 1) { | 106 if (matchlen >= l - 1) { |
92 AddMatch(id + 12 * n, l - 1, l, matches); | 107 AddMatch(id + 12 * n, l - 1, l, matches); |
93 if (l + 2 < max_length && | 108 if (l + 2 < max_length && |
94 data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' && | 109 data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' && |
95 data[l + 2] == ' ') { | 110 data[l + 2] == ' ') { |
96 AddMatch(id + 49 * n, l + 3, l, matches); | 111 AddMatch(id + 49 * n, l + 3, l, matches); |
97 } | 112 } |
98 found_match = true; | 113 has_found_match = BROTLI_TRUE; |
99 } | 114 } |
100 // Transform "" + kOmitLastN + "" (N = 2 .. 9) | 115 /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */ |
101 size_t minlen = min_length; | 116 minlen = min_length; |
102 if (l > 9) minlen = std::max(minlen, l - 9); | 117 if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9); |
103 size_t maxlen = std::min(matchlen, l - 2); | 118 maxlen = BROTLI_MIN(size_t, matchlen, l - 2); |
104 for (size_t len = minlen; len <= maxlen; ++len) { | 119 for (len = minlen; len <= maxlen; ++len) { |
105 AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches); | 120 AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches); |
106 found_match = true; | 121 has_found_match = BROTLI_TRUE; |
107 } | 122 } |
108 if (matchlen < l || l + 6 >= max_length) { | 123 if (matchlen < l || l + 6 >= max_length) { |
109 continue; | 124 continue; |
110 } | 125 } |
111 const uint8_t* s = &data[l]; | 126 s = &data[l]; |
112 // Transforms "" + kIdentity + <suffix> | 127 /* Transforms "" + kIdentity + <suffix> */ |
113 if (s[0] == ' ') { | 128 if (s[0] == ' ') { |
114 AddMatch(id + n, l + 1, l, matches); | 129 AddMatch(id + n, l + 1, l, matches); |
115 if (s[1] == 'a') { | 130 if (s[1] == 'a') { |
116 if (s[2] == ' ') { | 131 if (s[2] == ' ') { |
117 AddMatch(id + 28 * n, l + 3, l, matches); | 132 AddMatch(id + 28 * n, l + 3, l, matches); |
118 } else if (s[2] == 's') { | 133 } else if (s[2] == 's') { |
119 if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches); | 134 if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches); |
120 } else if (s[2] == 't') { | 135 } else if (s[2] == 't') { |
121 if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches); | 136 if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches); |
122 } else if (s[2] == 'n') { | 137 } else if (s[2] == 'n') { |
123 if (s[3] == 'd' && s[4] == ' ') { | 138 if (s[3] == 'd' && s[4] == ' ') { |
124 AddMatch(id + 10 * n, l + 5, l, matches); | 139 AddMatch(id + 10 * n, l + 5, l, matches); |
125 } | 140 } |
126 } | 141 } |
127 } else if (s[1] == 'b') { | 142 } else if (s[1] == 'b') { |
128 if (s[2] == 'y' && s[3] == ' ') { | 143 if (s[2] == 'y' && s[3] == ' ') { |
129 AddMatch(id + 38 * n, l + 4, l, matches); | 144 AddMatch(id + 38 * n, l + 4, l, matches); |
130 } | 145 } |
131 } else if (s[1] == 'i') { | 146 } else if (s[1] == 'i') { |
132 if (s[2] == 'n') { | 147 if (s[2] == 'n') { |
133 if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches); | 148 if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches); |
134 } else if (s[2] == 's') { | 149 } else if (s[2] == 's') { |
135 if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches); | 150 if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches); |
136 } | 151 } |
137 } else if (s[1] == 'f') { | 152 } else if (s[1] == 'f') { |
138 if (s[2] == 'o') { | 153 if (s[2] == 'o') { |
139 if (s[3] == 'r' && s[4] == ' ') { | 154 if (s[3] == 'r' && s[4] == ' ') { |
140 AddMatch(id + 25 * n, l + 5, l, matches); | 155 AddMatch(id + 25 * n, l + 5, l, matches); |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
228 AddMatch(id + 95 * n, l + 4, l, matches); | 243 AddMatch(id + 95 * n, l + 4, l, matches); |
229 } | 244 } |
230 } | 245 } |
231 } else if (s[0] == 'f') { | 246 } else if (s[0] == 'f') { |
232 if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') { | 247 if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') { |
233 AddMatch(id + 90 * n, l + 4, l, matches); | 248 AddMatch(id + 90 * n, l + 4, l, matches); |
234 } | 249 } |
235 } else if (s[0] == 'i') { | 250 } else if (s[0] == 'i') { |
236 if (s[1] == 'v') { | 251 if (s[1] == 'v') { |
237 if (s[2] == 'e' && s[3] == ' ') { | 252 if (s[2] == 'e' && s[3] == ' ') { |
238 AddMatch(id + 92 * n, l + 4, l, matches); | 253 AddMatch(id + 92 * n, l + 4, l, matches); |
239 } | 254 } |
240 } else if (s[1] == 'z') { | 255 } else if (s[1] == 'z') { |
241 if (s[2] == 'e' && s[3] == ' ') { | 256 if (s[2] == 'e' && s[3] == ' ') { |
242 AddMatch(id + 100 * n, l + 4, l, matches); | 257 AddMatch(id + 100 * n, l + 4, l, matches); |
243 } | 258 } |
244 } | 259 } |
245 } else if (s[0] == 'l') { | 260 } else if (s[0] == 'l') { |
246 if (s[1] == 'e') { | 261 if (s[1] == 'e') { |
247 if (s[2] == 's' && s[3] == 's' && s[4] == ' ') { | 262 if (s[2] == 's' && s[3] == 's' && s[4] == ' ') { |
248 AddMatch(id + 93 * n, l + 5, l, matches); | 263 AddMatch(id + 93 * n, l + 5, l, matches); |
249 } | 264 } |
250 } else if (s[1] == 'y') { | 265 } else if (s[1] == 'y') { |
251 if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches); | 266 if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches); |
252 } | 267 } |
253 } else if (s[0] == 'o') { | 268 } else if (s[0] == 'o') { |
254 if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') { | 269 if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') { |
255 AddMatch(id + 106 * n, l + 4, l, matches); | 270 AddMatch(id + 106 * n, l + 4, l, matches); |
256 } | 271 } |
257 } | 272 } |
258 } else { | 273 } else { |
259 // Set t=false for kUppercaseFirst and | 274 /* Set is_all_caps=0 for kUppercaseFirst and |
260 // t=true otherwise (kUppercaseAll) transform. | 275 is_all_caps=1 otherwise (kUppercaseAll) transform. */ |
261 const bool t = w.transform != kUppercaseFirst; | 276 const BROTLI_BOOL is_all_caps = |
| 277 TO_BROTLI_BOOL(w.transform != kUppercaseFirst); |
| 278 const uint8_t* s; |
262 if (!IsMatch(w, data, max_length)) { | 279 if (!IsMatch(w, data, max_length)) { |
263 continue; | 280 continue; |
264 } | 281 } |
265 // Transform "" + kUppercase{First,All} + "" | 282 /* Transform "" + kUppercase{First,All} + "" */ |
266 AddMatch(id + (t ? 44 : 9) * n, l, l, matches); | 283 AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches); |
267 found_match = true; | 284 has_found_match = BROTLI_TRUE; |
268 if (l + 1 >= max_length) { | 285 if (l + 1 >= max_length) { |
269 continue; | 286 continue; |
270 } | 287 } |
271 // Transforms "" + kUppercase{First,All} + <suffix> | 288 /* Transforms "" + kUppercase{First,All} + <suffix> */ |
272 const uint8_t* s = &data[l]; | 289 s = &data[l]; |
273 if (s[0] == ' ') { | 290 if (s[0] == ' ') { |
274 AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches); | 291 AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches); |
275 } else if (s[0] == '"') { | 292 } else if (s[0] == '"') { |
276 AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches); | 293 AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches); |
277 if (s[1] == '>') { | 294 if (s[1] == '>') { |
278 AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches); | 295 AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches); |
279 } | 296 } |
280 } else if (s[0] == '.') { | 297 } else if (s[0] == '.') { |
281 AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches); | 298 AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches); |
282 if (s[1] == ' ') { | 299 if (s[1] == ' ') { |
283 AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches); | 300 AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches); |
284 } | 301 } |
285 } else if (s[0] == ',') { | 302 } else if (s[0] == ',') { |
286 AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches); | 303 AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches); |
287 if (s[1] == ' ') { | 304 if (s[1] == ' ') { |
288 AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches); | 305 AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches); |
289 } | 306 } |
290 } else if (s[0] == '\'') { | 307 } else if (s[0] == '\'') { |
291 AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches); | 308 AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches); |
292 } else if (s[0] == '(') { | 309 } else if (s[0] == '(') { |
293 AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches); | 310 AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches); |
294 } else if (s[0] == '=') { | 311 } else if (s[0] == '=') { |
295 if (s[1] == '"') { | 312 if (s[1] == '"') { |
296 AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches); | 313 AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches); |
297 } else if (s[1] == '\'') { | 314 } else if (s[1] == '\'') { |
298 AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches); | 315 AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches); |
299 } | 316 } |
300 } | 317 } |
301 } | 318 } |
302 } | 319 } |
303 } | 320 } |
304 // Transforms with prefixes " " and "." | 321 /* Transforms with prefixes " " and "." */ |
305 if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { | 322 if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { |
306 bool is_space = (data[0] == ' '); | 323 BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' '); |
307 key = Hash(&data[1]); | 324 size_t offset = kStaticDictionaryBuckets[Hash(&data[1])]; |
308 bucket = kStaticDictionaryBuckets[key]; | 325 BROTLI_BOOL end = !offset; |
309 size_t num = bucket & 0xff; | 326 while (!end) { |
310 size_t offset = bucket >> 8; | 327 DictWord w = kStaticDictionaryWords[offset++]; |
311 for (size_t i = 0; i < num; ++i) { | 328 const size_t l = w.len & 0x7F; |
312 const DictWord w = kStaticDictionaryWords[offset + i]; | 329 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l]; |
313 const size_t l = w.len; | |
314 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l]; | |
315 const size_t id = w.idx; | 330 const size_t id = w.idx; |
| 331 end = !!(w.len & 0x80); |
| 332 w.len = (uint8_t)l; |
316 if (w.transform == 0) { | 333 if (w.transform == 0) { |
| 334 const uint8_t* s; |
317 if (!IsMatch(w, &data[1], max_length - 1)) { | 335 if (!IsMatch(w, &data[1], max_length - 1)) { |
318 continue; | 336 continue; |
319 } | 337 } |
320 // Transforms " " + kIdentity + "" and "." + kIdentity + "" | 338 /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */ |
321 AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); | 339 AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); |
322 found_match = true; | 340 has_found_match = BROTLI_TRUE; |
323 if (l + 2 >= max_length) { | 341 if (l + 2 >= max_length) { |
324 continue; | 342 continue; |
325 } | 343 } |
326 // Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix> | 344 /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix> |
327 const uint8_t* s = &data[l + 1]; | 345 */ |
| 346 s = &data[l + 1]; |
328 if (s[0] == ' ') { | 347 if (s[0] == ' ') { |
329 AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches); | 348 AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches); |
330 } else if (s[0] == '(') { | 349 } else if (s[0] == '(') { |
331 AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches); | 350 AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches); |
332 } else if (is_space) { | 351 } else if (is_space) { |
333 if (s[0] == ',') { | 352 if (s[0] == ',') { |
334 AddMatch(id + 103 * n, l + 2, l, matches); | 353 AddMatch(id + 103 * n, l + 2, l, matches); |
335 if (s[1] == ' ') { | 354 if (s[1] == ' ') { |
336 AddMatch(id + 33 * n, l + 3, l, matches); | 355 AddMatch(id + 33 * n, l + 3, l, matches); |
337 } | 356 } |
338 } else if (s[0] == '.') { | 357 } else if (s[0] == '.') { |
339 AddMatch(id + 71 * n, l + 2, l, matches); | 358 AddMatch(id + 71 * n, l + 2, l, matches); |
340 if (s[1] == ' ') { | 359 if (s[1] == ' ') { |
341 AddMatch(id + 52 * n, l + 3, l, matches); | 360 AddMatch(id + 52 * n, l + 3, l, matches); |
342 } | 361 } |
343 } else if (s[0] == '=') { | 362 } else if (s[0] == '=') { |
344 if (s[1] == '"') { | 363 if (s[1] == '"') { |
345 AddMatch(id + 81 * n, l + 3, l, matches); | 364 AddMatch(id + 81 * n, l + 3, l, matches); |
346 } else if (s[1] == '\'') { | 365 } else if (s[1] == '\'') { |
347 AddMatch(id + 98 * n, l + 3, l, matches); | 366 AddMatch(id + 98 * n, l + 3, l, matches); |
348 } | 367 } |
349 } | 368 } |
350 } | 369 } |
351 } else if (is_space) { | 370 } else if (is_space) { |
352 // Set t=false for kUppercaseFirst and | 371 /* Set is_all_caps=0 for kUppercaseFirst and |
353 // t=true otherwise (kUppercaseAll) transform. | 372 is_all_caps=1 otherwise (kUppercaseAll) transform. */ |
354 const bool t = w.transform != kUppercaseFirst; | 373 const BROTLI_BOOL is_all_caps = |
| 374 TO_BROTLI_BOOL(w.transform != kUppercaseFirst); |
| 375 const uint8_t* s; |
355 if (!IsMatch(w, &data[1], max_length - 1)) { | 376 if (!IsMatch(w, &data[1], max_length - 1)) { |
356 continue; | 377 continue; |
357 } | 378 } |
358 // Transforms " " + kUppercase{First,All} + "" | 379 /* Transforms " " + kUppercase{First,All} + "" */ |
359 AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches); | 380 AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches); |
360 found_match = true; | 381 has_found_match = BROTLI_TRUE; |
361 if (l + 2 >= max_length) { | 382 if (l + 2 >= max_length) { |
362 continue; | 383 continue; |
363 } | 384 } |
364 // Transforms " " + kUppercase{First,All} + <suffix> | 385 /* Transforms " " + kUppercase{First,All} + <suffix> */ |
365 const uint8_t* s = &data[l + 1]; | 386 s = &data[l + 1]; |
366 if (s[0] == ' ') { | 387 if (s[0] == ' ') { |
367 AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches); | 388 AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches); |
368 } else if (s[0] == ',') { | 389 } else if (s[0] == ',') { |
369 if (!t) { | 390 if (!is_all_caps) { |
370 AddMatch(id + 109 * n, l + 2, l, matches); | 391 AddMatch(id + 109 * n, l + 2, l, matches); |
371 } | 392 } |
372 if (s[1] == ' ') { | 393 if (s[1] == ' ') { |
373 AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches); | 394 AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches); |
374 } | 395 } |
375 } else if (s[0] == '.') { | 396 } else if (s[0] == '.') { |
376 AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches); | 397 AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches); |
377 if (s[1] == ' ') { | 398 if (s[1] == ' ') { |
378 AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches); | 399 AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches); |
379 } | 400 } |
380 } else if (s[0] == '=') { | 401 } else if (s[0] == '=') { |
381 if (s[1] == '"') { | 402 if (s[1] == '"') { |
382 AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches); | 403 AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches); |
383 } else if (s[1] == '\'') { | 404 } else if (s[1] == '\'') { |
384 AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches); | 405 AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches); |
385 } | 406 } |
386 } | 407 } |
387 } | 408 } |
388 } | 409 } |
389 } | 410 } |
390 if (max_length >= 6) { | 411 if (max_length >= 6) { |
391 // Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" | 412 /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */ |
392 if ((data[1] == ' ' && | 413 if ((data[1] == ' ' && |
393 (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || | 414 (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || |
394 (data[0] == 0xc2 && data[1] == 0xa0)) { | 415 (data[0] == 0xc2 && data[1] == 0xa0)) { |
395 key = Hash(&data[2]); | 416 size_t offset = kStaticDictionaryBuckets[Hash(&data[2])]; |
396 bucket = kStaticDictionaryBuckets[key]; | 417 BROTLI_BOOL end = !offset; |
397 size_t num = bucket & 0xff; | 418 while (!end) { |
398 size_t offset = bucket >> 8; | 419 DictWord w = kStaticDictionaryWords[offset++]; |
399 for (size_t i = 0; i < num; ++i) { | 420 const size_t l = w.len & 0x7F; |
400 const DictWord w = kStaticDictionaryWords[offset + i]; | 421 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l]; |
401 const size_t l = w.len; | |
402 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l]; | |
403 const size_t id = w.idx; | 422 const size_t id = w.idx; |
| 423 end = !!(w.len & 0x80); |
| 424 w.len = (uint8_t)l; |
404 if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) { | 425 if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) { |
405 if (data[0] == 0xc2) { | 426 if (data[0] == 0xc2) { |
406 AddMatch(id + 102 * n, l + 2, l, matches); | 427 AddMatch(id + 102 * n, l + 2, l, matches); |
407 found_match = true; | 428 has_found_match = BROTLI_TRUE; |
408 } else if (l + 2 < max_length && data[l + 2] == ' ') { | 429 } else if (l + 2 < max_length && data[l + 2] == ' ') { |
409 size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13); | 430 size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13); |
410 AddMatch(id + t * n, l + 3, l, matches); | 431 AddMatch(id + t * n, l + 3, l, matches); |
411 found_match = true; | 432 has_found_match = BROTLI_TRUE; |
412 } | 433 } |
413 } | 434 } |
414 } | 435 } |
415 } | 436 } |
416 } | 437 } |
417 if (max_length >= 9) { | 438 if (max_length >= 9) { |
418 // Transforms with prefixes " the " and ".com/" | 439 /* Transforms with prefixes " the " and ".com/" */ |
419 if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' && | 440 if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' && |
420 data[3] == 'e' && data[4] == ' ') || | 441 data[3] == 'e' && data[4] == ' ') || |
421 (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && | 442 (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && |
422 data[3] == 'm' && data[4] == '/')) { | 443 data[3] == 'm' && data[4] == '/')) { |
423 key = Hash(&data[5]); | 444 size_t offset = kStaticDictionaryBuckets[Hash(&data[5])]; |
424 bucket = kStaticDictionaryBuckets[key]; | 445 BROTLI_BOOL end = !offset; |
425 size_t num = bucket & 0xff; | 446 while (!end) { |
426 size_t offset = bucket >> 8; | 447 DictWord w = kStaticDictionaryWords[offset++]; |
427 for (size_t i = 0; i < num; ++i) { | 448 const size_t l = w.len & 0x7F; |
428 const DictWord w = kStaticDictionaryWords[offset + i]; | 449 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l]; |
429 const size_t l = w.len; | |
430 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l]; | |
431 const size_t id = w.idx; | 450 const size_t id = w.idx; |
| 451 end = !!(w.len & 0x80); |
| 452 w.len = (uint8_t)l; |
432 if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) { | 453 if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) { |
433 AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); | 454 AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); |
434 found_match = true; | 455 has_found_match = BROTLI_TRUE; |
435 if (l + 5 < max_length) { | 456 if (l + 5 < max_length) { |
436 const uint8_t* s = &data[l + 5]; | 457 const uint8_t* s = &data[l + 5]; |
437 if (data[0] == ' ') { | 458 if (data[0] == ' ') { |
438 if (l + 8 < max_length && | 459 if (l + 8 < max_length && |
439 s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') { | 460 s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') { |
440 AddMatch(id + 62 * n, l + 9, l, matches); | 461 AddMatch(id + 62 * n, l + 9, l, matches); |
441 if (l + 12 < max_length && | 462 if (l + 12 < max_length && |
442 s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') { | 463 s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') { |
443 AddMatch(id + 73 * n, l + 13, l, matches); | 464 AddMatch(id + 73 * n, l + 13, l, matches); |
444 } | 465 } |
445 } | 466 } |
446 } | 467 } |
447 } | 468 } |
448 } | 469 } |
449 } | 470 } |
450 } | 471 } |
451 } | 472 } |
452 return found_match; | 473 return has_found_match; |
453 } | 474 } |
454 | 475 |
455 } // namespace brotli | 476 #if defined(__cplusplus) || defined(c_plusplus) |
| 477 } /* extern "C" */ |
| 478 #endif |
OLD | NEW |