Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(733)

Side by Side Diff: third_party/brotli/enc/static_dict.c

Issue 2537133002: Update brotli to v1.0.0-snapshot. (Closed)
Patch Set: Fixed typo Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/brotli/enc/static_dict.h ('k') | third_party/brotli/enc/static_dict.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright 2013 Google Inc. All Rights Reserved. 1 /* Copyright 2013 Google Inc. All Rights Reserved.
2 2
3 Distributed under MIT license. 3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */ 5 */
6 6
7 #include "./static_dict.h" 7 #include "./static_dict.h"
8 8
9 #include <algorithm> 9 #include "../common/dictionary.h"
10 #include "./find_match_length.h"
11 #include "./port.h"
12 #include "./static_dict_lut.h"
10 13
11 #include "./dictionary.h" 14 #if defined(__cplusplus) || defined(c_plusplus)
12 #include "./find_match_length.h" 15 extern "C" {
13 #include "./static_dict_lut.h" 16 #endif
14 #include "./transform.h"
15 17
16 namespace brotli { 18 static const uint8_t kUppercaseFirst = 10;
19 static const uint8_t kOmitLastNTransforms[10] = {
20 0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
21 };
17 22
18 inline uint32_t Hash(const uint8_t *data) { 23 static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
19 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32; 24 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
20 // The higher bits contain more mixture from the multiplication, 25 /* The higher bits contain more mixture from the multiplication,
21 // so we take our results from there. 26 so we take our results from there. */
22 return h >> (32 - kDictNumBits); 27 return h >> (32 - kDictNumBits);
23 } 28 }
24 29
25 inline void AddMatch(size_t distance, size_t len, size_t len_code, 30 static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
26 uint32_t* matches) { 31 uint32_t* matches) {
27 uint32_t match = static_cast<uint32_t>((distance << 5) + len_code); 32 uint32_t match = (uint32_t)((distance << 5) + len_code);
28 matches[len] = std::min(matches[len], match); 33 matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
29 } 34 }
30 35
31 inline size_t DictMatchLength(const uint8_t* data, 36 static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data,
32 size_t id, 37 size_t id,
33 size_t len, 38 size_t len,
34 size_t maxlen) { 39 size_t maxlen) {
35 const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id; 40 const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
36 return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data, 41 return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
37 std::min(len, maxlen)); 42 BROTLI_MIN(size_t, len, maxlen));
38 } 43 }
39 44
40 inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) { 45 static BROTLI_INLINE BROTLI_BOOL IsMatch(
41 if (w.len > max_length) return false; 46 DictWord w, const uint8_t* data, size_t max_length) {
42 const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx; 47 if (w.len > max_length) {
43 const uint8_t* dict = &kBrotliDictionary[offset]; 48 return BROTLI_FALSE;
44 if (w.transform == 0) {
45 // Match against base dictionary word.
46 return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
47 } else if (w.transform == 10) {
48 // Match against uppercase first transform.
49 // Note that there are only ASCII uppercase words in the lookup table.
50 return (dict[0] >= 'a' && dict[0] <= 'z' &&
51 (dict[0] ^ 32) == data[0] &&
52 FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
53 w.len - 1u);
54 } else { 49 } else {
55 // Match against uppercase all transform. 50 const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] +
56 // Note that there are only ASCII uppercase words in the lookup table. 51 (size_t)w.len * (size_t)w.idx;
57 for (size_t i = 0; i < w.len; ++i) { 52 const uint8_t* dict = &kBrotliDictionary[offset];
58 if (dict[i] >= 'a' && dict[i] <= 'z') { 53 if (w.transform == 0) {
59 if ((dict[i] ^ 32) != data[i]) return false; 54 /* Match against base dictionary word. */
60 } else { 55 return
61 if (dict[i] != data[i]) return false; 56 TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
57 } else if (w.transform == 10) {
58 /* Match against uppercase first transform.
59 Note that there are only ASCII uppercase words in the lookup table. */
60 return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
61 (dict[0] ^ 32) == data[0] &&
62 FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
63 w.len - 1u);
64 } else {
65 /* Match against uppercase all transform.
66 Note that there are only ASCII uppercase words in the lookup table. */
67 size_t i;
68 for (i = 0; i < w.len; ++i) {
69 if (dict[i] >= 'a' && dict[i] <= 'z') {
70 if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
71 } else {
72 if (dict[i] != data[i]) return BROTLI_FALSE;
73 }
62 } 74 }
75 return BROTLI_TRUE;
63 } 76 }
64 return true;
65 } 77 }
66 } 78 }
67 79
68 bool FindAllStaticDictionaryMatches(const uint8_t* data, 80 BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
69 size_t min_length, 81 const uint8_t* data, size_t min_length, size_t max_length,
70 size_t max_length, 82 uint32_t* matches) {
71 uint32_t* matches) { 83 BROTLI_BOOL has_found_match = BROTLI_FALSE;
72 bool found_match = false; 84 {
73 size_t key = Hash(data); 85 size_t offset = kStaticDictionaryBuckets[Hash(data)];
74 size_t bucket = kStaticDictionaryBuckets[key]; 86 BROTLI_BOOL end = !offset;
75 if (bucket != 0) { 87 while (!end) {
76 size_t num = bucket & 0xff; 88 DictWord w = kStaticDictionaryWords[offset++];
77 size_t offset = bucket >> 8; 89 const size_t l = w.len & 0x7F;
78 for (size_t i = 0; i < num; ++i) { 90 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
79 const DictWord w = kStaticDictionaryWords[offset + i];
80 const size_t l = w.len;
81 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
82 const size_t id = w.idx; 91 const size_t id = w.idx;
92 end = !!(w.len & 0x80);
93 w.len = (uint8_t)l;
83 if (w.transform == 0) { 94 if (w.transform == 0) {
84 const size_t matchlen = DictMatchLength(data, id, l, max_length); 95 const size_t matchlen = DictMatchLength(data, id, l, max_length);
85 // Transform "" + kIdentity + "" 96 const uint8_t* s;
97 size_t minlen;
98 size_t maxlen;
99 size_t len;
100 /* Transform "" + kIdentity + "" */
86 if (matchlen == l) { 101 if (matchlen == l) {
87 AddMatch(id, l, l, matches); 102 AddMatch(id, l, l, matches);
88 found_match = true; 103 has_found_match = BROTLI_TRUE;
89 } 104 }
90 // Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " 105 /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
91 if (matchlen >= l - 1) { 106 if (matchlen >= l - 1) {
92 AddMatch(id + 12 * n, l - 1, l, matches); 107 AddMatch(id + 12 * n, l - 1, l, matches);
93 if (l + 2 < max_length && 108 if (l + 2 < max_length &&
94 data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' && 109 data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
95 data[l + 2] == ' ') { 110 data[l + 2] == ' ') {
96 AddMatch(id + 49 * n, l + 3, l, matches); 111 AddMatch(id + 49 * n, l + 3, l, matches);
97 } 112 }
98 found_match = true; 113 has_found_match = BROTLI_TRUE;
99 } 114 }
100 // Transform "" + kOmitLastN + "" (N = 2 .. 9) 115 /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
101 size_t minlen = min_length; 116 minlen = min_length;
102 if (l > 9) minlen = std::max(minlen, l - 9); 117 if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
103 size_t maxlen = std::min(matchlen, l - 2); 118 maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
104 for (size_t len = minlen; len <= maxlen; ++len) { 119 for (len = minlen; len <= maxlen; ++len) {
105 AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches); 120 AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
106 found_match = true; 121 has_found_match = BROTLI_TRUE;
107 } 122 }
108 if (matchlen < l || l + 6 >= max_length) { 123 if (matchlen < l || l + 6 >= max_length) {
109 continue; 124 continue;
110 } 125 }
111 const uint8_t* s = &data[l]; 126 s = &data[l];
112 // Transforms "" + kIdentity + <suffix> 127 /* Transforms "" + kIdentity + <suffix> */
113 if (s[0] == ' ') { 128 if (s[0] == ' ') {
114 AddMatch(id + n, l + 1, l, matches); 129 AddMatch(id + n, l + 1, l, matches);
115 if (s[1] == 'a') { 130 if (s[1] == 'a') {
116 if (s[2] == ' ') { 131 if (s[2] == ' ') {
117 AddMatch(id + 28 * n, l + 3, l, matches); 132 AddMatch(id + 28 * n, l + 3, l, matches);
118 } else if (s[2] == 's') { 133 } else if (s[2] == 's') {
119 if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches); 134 if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
120 } else if (s[2] == 't') { 135 } else if (s[2] == 't') {
121 if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches); 136 if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
122 } else if (s[2] == 'n') { 137 } else if (s[2] == 'n') {
123 if (s[3] == 'd' && s[4] == ' ') { 138 if (s[3] == 'd' && s[4] == ' ') {
124 AddMatch(id + 10 * n, l + 5, l, matches); 139 AddMatch(id + 10 * n, l + 5, l, matches);
125 } 140 }
126 } 141 }
127 } else if (s[1] == 'b') { 142 } else if (s[1] == 'b') {
128 if (s[2] == 'y' && s[3] == ' ') { 143 if (s[2] == 'y' && s[3] == ' ') {
129 AddMatch(id + 38 * n, l + 4, l, matches); 144 AddMatch(id + 38 * n, l + 4, l, matches);
130 } 145 }
131 } else if (s[1] == 'i') { 146 } else if (s[1] == 'i') {
132 if (s[2] == 'n') { 147 if (s[2] == 'n') {
133 if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches); 148 if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
134 } else if (s[2] == 's') { 149 } else if (s[2] == 's') {
135 if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches); 150 if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
136 } 151 }
137 } else if (s[1] == 'f') { 152 } else if (s[1] == 'f') {
138 if (s[2] == 'o') { 153 if (s[2] == 'o') {
139 if (s[3] == 'r' && s[4] == ' ') { 154 if (s[3] == 'r' && s[4] == ' ') {
140 AddMatch(id + 25 * n, l + 5, l, matches); 155 AddMatch(id + 25 * n, l + 5, l, matches);
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
228 AddMatch(id + 95 * n, l + 4, l, matches); 243 AddMatch(id + 95 * n, l + 4, l, matches);
229 } 244 }
230 } 245 }
231 } else if (s[0] == 'f') { 246 } else if (s[0] == 'f') {
232 if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') { 247 if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
233 AddMatch(id + 90 * n, l + 4, l, matches); 248 AddMatch(id + 90 * n, l + 4, l, matches);
234 } 249 }
235 } else if (s[0] == 'i') { 250 } else if (s[0] == 'i') {
236 if (s[1] == 'v') { 251 if (s[1] == 'v') {
237 if (s[2] == 'e' && s[3] == ' ') { 252 if (s[2] == 'e' && s[3] == ' ') {
238 AddMatch(id + 92 * n, l + 4, l, matches); 253 AddMatch(id + 92 * n, l + 4, l, matches);
239 } 254 }
240 } else if (s[1] == 'z') { 255 } else if (s[1] == 'z') {
241 if (s[2] == 'e' && s[3] == ' ') { 256 if (s[2] == 'e' && s[3] == ' ') {
242 AddMatch(id + 100 * n, l + 4, l, matches); 257 AddMatch(id + 100 * n, l + 4, l, matches);
243 } 258 }
244 } 259 }
245 } else if (s[0] == 'l') { 260 } else if (s[0] == 'l') {
246 if (s[1] == 'e') { 261 if (s[1] == 'e') {
247 if (s[2] == 's' && s[3] == 's' && s[4] == ' ') { 262 if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
248 AddMatch(id + 93 * n, l + 5, l, matches); 263 AddMatch(id + 93 * n, l + 5, l, matches);
249 } 264 }
250 } else if (s[1] == 'y') { 265 } else if (s[1] == 'y') {
251 if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches); 266 if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
252 } 267 }
253 } else if (s[0] == 'o') { 268 } else if (s[0] == 'o') {
254 if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') { 269 if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
255 AddMatch(id + 106 * n, l + 4, l, matches); 270 AddMatch(id + 106 * n, l + 4, l, matches);
256 } 271 }
257 } 272 }
258 } else { 273 } else {
259 // Set t=false for kUppercaseFirst and 274 /* Set is_all_caps=0 for kUppercaseFirst and
260 // t=true otherwise (kUppercaseAll) transform. 275 is_all_caps=1 otherwise (kUppercaseAll) transform. */
261 const bool t = w.transform != kUppercaseFirst; 276 const BROTLI_BOOL is_all_caps =
277 TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
278 const uint8_t* s;
262 if (!IsMatch(w, data, max_length)) { 279 if (!IsMatch(w, data, max_length)) {
263 continue; 280 continue;
264 } 281 }
265 // Transform "" + kUppercase{First,All} + "" 282 /* Transform "" + kUppercase{First,All} + "" */
266 AddMatch(id + (t ? 44 : 9) * n, l, l, matches); 283 AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
267 found_match = true; 284 has_found_match = BROTLI_TRUE;
268 if (l + 1 >= max_length) { 285 if (l + 1 >= max_length) {
269 continue; 286 continue;
270 } 287 }
271 // Transforms "" + kUppercase{First,All} + <suffix> 288 /* Transforms "" + kUppercase{First,All} + <suffix> */
272 const uint8_t* s = &data[l]; 289 s = &data[l];
273 if (s[0] == ' ') { 290 if (s[0] == ' ') {
274 AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches); 291 AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
275 } else if (s[0] == '"') { 292 } else if (s[0] == '"') {
276 AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches); 293 AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
277 if (s[1] == '>') { 294 if (s[1] == '>') {
278 AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches); 295 AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
279 } 296 }
280 } else if (s[0] == '.') { 297 } else if (s[0] == '.') {
281 AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches); 298 AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
282 if (s[1] == ' ') { 299 if (s[1] == ' ') {
283 AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches); 300 AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
284 } 301 }
285 } else if (s[0] == ',') { 302 } else if (s[0] == ',') {
286 AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches); 303 AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
287 if (s[1] == ' ') { 304 if (s[1] == ' ') {
288 AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches); 305 AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
289 } 306 }
290 } else if (s[0] == '\'') { 307 } else if (s[0] == '\'') {
291 AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches); 308 AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
292 } else if (s[0] == '(') { 309 } else if (s[0] == '(') {
293 AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches); 310 AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
294 } else if (s[0] == '=') { 311 } else if (s[0] == '=') {
295 if (s[1] == '"') { 312 if (s[1] == '"') {
296 AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches); 313 AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
297 } else if (s[1] == '\'') { 314 } else if (s[1] == '\'') {
298 AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches); 315 AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
299 } 316 }
300 } 317 }
301 } 318 }
302 } 319 }
303 } 320 }
304 // Transforms with prefixes " " and "." 321 /* Transforms with prefixes " " and "." */
305 if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { 322 if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
306 bool is_space = (data[0] == ' '); 323 BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
307 key = Hash(&data[1]); 324 size_t offset = kStaticDictionaryBuckets[Hash(&data[1])];
308 bucket = kStaticDictionaryBuckets[key]; 325 BROTLI_BOOL end = !offset;
309 size_t num = bucket & 0xff; 326 while (!end) {
310 size_t offset = bucket >> 8; 327 DictWord w = kStaticDictionaryWords[offset++];
311 for (size_t i = 0; i < num; ++i) { 328 const size_t l = w.len & 0x7F;
312 const DictWord w = kStaticDictionaryWords[offset + i]; 329 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
313 const size_t l = w.len;
314 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
315 const size_t id = w.idx; 330 const size_t id = w.idx;
331 end = !!(w.len & 0x80);
332 w.len = (uint8_t)l;
316 if (w.transform == 0) { 333 if (w.transform == 0) {
334 const uint8_t* s;
317 if (!IsMatch(w, &data[1], max_length - 1)) { 335 if (!IsMatch(w, &data[1], max_length - 1)) {
318 continue; 336 continue;
319 } 337 }
320 // Transforms " " + kIdentity + "" and "." + kIdentity + "" 338 /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
321 AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); 339 AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
322 found_match = true; 340 has_found_match = BROTLI_TRUE;
323 if (l + 2 >= max_length) { 341 if (l + 2 >= max_length) {
324 continue; 342 continue;
325 } 343 }
326 // Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix> 344 /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
327 const uint8_t* s = &data[l + 1]; 345 */
346 s = &data[l + 1];
328 if (s[0] == ' ') { 347 if (s[0] == ' ') {
329 AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches); 348 AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
330 } else if (s[0] == '(') { 349 } else if (s[0] == '(') {
331 AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches); 350 AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
332 } else if (is_space) { 351 } else if (is_space) {
333 if (s[0] == ',') { 352 if (s[0] == ',') {
334 AddMatch(id + 103 * n, l + 2, l, matches); 353 AddMatch(id + 103 * n, l + 2, l, matches);
335 if (s[1] == ' ') { 354 if (s[1] == ' ') {
336 AddMatch(id + 33 * n, l + 3, l, matches); 355 AddMatch(id + 33 * n, l + 3, l, matches);
337 } 356 }
338 } else if (s[0] == '.') { 357 } else if (s[0] == '.') {
339 AddMatch(id + 71 * n, l + 2, l, matches); 358 AddMatch(id + 71 * n, l + 2, l, matches);
340 if (s[1] == ' ') { 359 if (s[1] == ' ') {
341 AddMatch(id + 52 * n, l + 3, l, matches); 360 AddMatch(id + 52 * n, l + 3, l, matches);
342 } 361 }
343 } else if (s[0] == '=') { 362 } else if (s[0] == '=') {
344 if (s[1] == '"') { 363 if (s[1] == '"') {
345 AddMatch(id + 81 * n, l + 3, l, matches); 364 AddMatch(id + 81 * n, l + 3, l, matches);
346 } else if (s[1] == '\'') { 365 } else if (s[1] == '\'') {
347 AddMatch(id + 98 * n, l + 3, l, matches); 366 AddMatch(id + 98 * n, l + 3, l, matches);
348 } 367 }
349 } 368 }
350 } 369 }
351 } else if (is_space) { 370 } else if (is_space) {
352 // Set t=false for kUppercaseFirst and 371 /* Set is_all_caps=0 for kUppercaseFirst and
353 // t=true otherwise (kUppercaseAll) transform. 372 is_all_caps=1 otherwise (kUppercaseAll) transform. */
354 const bool t = w.transform != kUppercaseFirst; 373 const BROTLI_BOOL is_all_caps =
374 TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
375 const uint8_t* s;
355 if (!IsMatch(w, &data[1], max_length - 1)) { 376 if (!IsMatch(w, &data[1], max_length - 1)) {
356 continue; 377 continue;
357 } 378 }
358 // Transforms " " + kUppercase{First,All} + "" 379 /* Transforms " " + kUppercase{First,All} + "" */
359 AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches); 380 AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
360 found_match = true; 381 has_found_match = BROTLI_TRUE;
361 if (l + 2 >= max_length) { 382 if (l + 2 >= max_length) {
362 continue; 383 continue;
363 } 384 }
364 // Transforms " " + kUppercase{First,All} + <suffix> 385 /* Transforms " " + kUppercase{First,All} + <suffix> */
365 const uint8_t* s = &data[l + 1]; 386 s = &data[l + 1];
366 if (s[0] == ' ') { 387 if (s[0] == ' ') {
367 AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches); 388 AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
368 } else if (s[0] == ',') { 389 } else if (s[0] == ',') {
369 if (!t) { 390 if (!is_all_caps) {
370 AddMatch(id + 109 * n, l + 2, l, matches); 391 AddMatch(id + 109 * n, l + 2, l, matches);
371 } 392 }
372 if (s[1] == ' ') { 393 if (s[1] == ' ') {
373 AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches); 394 AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
374 } 395 }
375 } else if (s[0] == '.') { 396 } else if (s[0] == '.') {
376 AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches); 397 AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
377 if (s[1] == ' ') { 398 if (s[1] == ' ') {
378 AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches); 399 AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
379 } 400 }
380 } else if (s[0] == '=') { 401 } else if (s[0] == '=') {
381 if (s[1] == '"') { 402 if (s[1] == '"') {
382 AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches); 403 AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
383 } else if (s[1] == '\'') { 404 } else if (s[1] == '\'') {
384 AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches); 405 AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
385 } 406 }
386 } 407 }
387 } 408 }
388 } 409 }
389 } 410 }
390 if (max_length >= 6) { 411 if (max_length >= 6) {
391 // Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" 412 /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */
392 if ((data[1] == ' ' && 413 if ((data[1] == ' ' &&
393 (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || 414 (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
394 (data[0] == 0xc2 && data[1] == 0xa0)) { 415 (data[0] == 0xc2 && data[1] == 0xa0)) {
395 key = Hash(&data[2]); 416 size_t offset = kStaticDictionaryBuckets[Hash(&data[2])];
396 bucket = kStaticDictionaryBuckets[key]; 417 BROTLI_BOOL end = !offset;
397 size_t num = bucket & 0xff; 418 while (!end) {
398 size_t offset = bucket >> 8; 419 DictWord w = kStaticDictionaryWords[offset++];
399 for (size_t i = 0; i < num; ++i) { 420 const size_t l = w.len & 0x7F;
400 const DictWord w = kStaticDictionaryWords[offset + i]; 421 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
401 const size_t l = w.len;
402 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
403 const size_t id = w.idx; 422 const size_t id = w.idx;
423 end = !!(w.len & 0x80);
424 w.len = (uint8_t)l;
404 if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) { 425 if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
405 if (data[0] == 0xc2) { 426 if (data[0] == 0xc2) {
406 AddMatch(id + 102 * n, l + 2, l, matches); 427 AddMatch(id + 102 * n, l + 2, l, matches);
407 found_match = true; 428 has_found_match = BROTLI_TRUE;
408 } else if (l + 2 < max_length && data[l + 2] == ' ') { 429 } else if (l + 2 < max_length && data[l + 2] == ' ') {
409 size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13); 430 size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
410 AddMatch(id + t * n, l + 3, l, matches); 431 AddMatch(id + t * n, l + 3, l, matches);
411 found_match = true; 432 has_found_match = BROTLI_TRUE;
412 } 433 }
413 } 434 }
414 } 435 }
415 } 436 }
416 } 437 }
417 if (max_length >= 9) { 438 if (max_length >= 9) {
418 // Transforms with prefixes " the " and ".com/" 439 /* Transforms with prefixes " the " and ".com/" */
419 if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' && 440 if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
420 data[3] == 'e' && data[4] == ' ') || 441 data[3] == 'e' && data[4] == ' ') ||
421 (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && 442 (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
422 data[3] == 'm' && data[4] == '/')) { 443 data[3] == 'm' && data[4] == '/')) {
423 key = Hash(&data[5]); 444 size_t offset = kStaticDictionaryBuckets[Hash(&data[5])];
424 bucket = kStaticDictionaryBuckets[key]; 445 BROTLI_BOOL end = !offset;
425 size_t num = bucket & 0xff; 446 while (!end) {
426 size_t offset = bucket >> 8; 447 DictWord w = kStaticDictionaryWords[offset++];
427 for (size_t i = 0; i < num; ++i) { 448 const size_t l = w.len & 0x7F;
428 const DictWord w = kStaticDictionaryWords[offset + i]; 449 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
429 const size_t l = w.len;
430 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
431 const size_t id = w.idx; 450 const size_t id = w.idx;
451 end = !!(w.len & 0x80);
452 w.len = (uint8_t)l;
432 if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) { 453 if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
433 AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); 454 AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
434 found_match = true; 455 has_found_match = BROTLI_TRUE;
435 if (l + 5 < max_length) { 456 if (l + 5 < max_length) {
436 const uint8_t* s = &data[l + 5]; 457 const uint8_t* s = &data[l + 5];
437 if (data[0] == ' ') { 458 if (data[0] == ' ') {
438 if (l + 8 < max_length && 459 if (l + 8 < max_length &&
439 s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') { 460 s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
440 AddMatch(id + 62 * n, l + 9, l, matches); 461 AddMatch(id + 62 * n, l + 9, l, matches);
441 if (l + 12 < max_length && 462 if (l + 12 < max_length &&
442 s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') { 463 s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
443 AddMatch(id + 73 * n, l + 13, l, matches); 464 AddMatch(id + 73 * n, l + 13, l, matches);
444 } 465 }
445 } 466 }
446 } 467 }
447 } 468 }
448 } 469 }
449 } 470 }
450 } 471 }
451 } 472 }
452 return found_match; 473 return has_found_match;
453 } 474 }
454 475
455 } // namespace brotli 476 #if defined(__cplusplus) || defined(c_plusplus)
477 } /* extern "C" */
478 #endif
OLDNEW
« no previous file with comments | « third_party/brotli/enc/static_dict.h ('k') | third_party/brotli/enc/static_dict.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698