third_party/brotli/enc/static_dict.cc - Issue 1956893002: Added brotli enc/ and tools/ directories.

Side by Side Diff: third_party/brotli/enc/static_dict.cc

Issue 1956893002: Added brotli enc/ and tools/ directories. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Updated to most recent build tools Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /* Copyright 2013 Google Inc. All Rights Reserved.

	2

	3 Distributed under MIT license.

	4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT

	5 */

	6

	7 #include "./static_dict.h"

	8

	9 #include <algorithm>

	10

	11 #include "./dictionary.h"

	12 #include "./find_match_length.h"

	13 #include "./static_dict_lut.h"

	14 #include "./transform.h"

	15

	16 namespace brotli {

	17

	18 inline uint32_t Hash(const uint8_t *data) {

	19 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;

	20 // The higher bits contain more mixture from the multiplication,

	21 // so we take our results from there.

	22 return h >> (32 - kDictNumBits);

	23 }

	24

	25 inline void AddMatch(size_t distance, size_t len, size_t len_code,

	26 uint32_t* matches) {

	27 uint32_t match = static_cast<uint32_t>((distance << 5) + len_code);

	28 matches[len] = std::min(matches[len], match);

	29 }

	30

	31 inline size_t DictMatchLength(const uint8_t* data,

	32 size_t id,

	33 size_t len,

	34 size_t maxlen) {

	35 const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;

	36 return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,

	37 std::min(len, maxlen));

	38 }

	39

	40 inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {

	41 if (w.len > max_length) return false;

	42 const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;

	43 const uint8_t* dict = &kBrotliDictionary[offset];

	44 if (w.transform == 0) {

	45 // Match against base dictionary word.

	46 return FindMatchLengthWithLimit(dict, data, w.len) == w.len;

	47 } else if (w.transform == 10) {

	48 // Match against uppercase first transform.

	49 // Note that there are only ASCII uppercase words in the lookup table.

	50 return (dict[0] >= 'a' && dict[0] <= 'z' &&

	51 (dict[0] ^ 32) == data[0] &&

	52 FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==

	53 w.len - 1u);

	54 } else {

	55 // Match against uppercase all transform.

	56 // Note that there are only ASCII uppercase words in the lookup table.

	57 for (size_t i = 0; i < w.len; ++i) {

	58 if (dict[i] >= 'a' && dict[i] <= 'z') {

	59 if ((dict[i] ^ 32) != data[i]) return false;

	60 } else {

	61 if (dict[i] != data[i]) return false;

	62 }

	63 }

	64 return true;

	65 }

	66 }

	67

	68 bool FindAllStaticDictionaryMatches(const uint8_t* data,

	69 size_t min_length,

	70 size_t max_length,

	71 uint32_t* matches) {

	72 bool found_match = false;

	73 size_t key = Hash(data);

	74 size_t bucket = kStaticDictionaryBuckets[key];

	75 if (bucket != 0) {

	76 size_t num = bucket & 0xff;

	77 size_t offset = bucket >> 8;

	78 for (size_t i = 0; i < num; ++i) {

	79 const DictWord w = kStaticDictionaryWords[offset + i];

	80 const size_t l = w.len;

	81 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];

	82 const size_t id = w.idx;

	83 if (w.transform == 0) {

	84 const size_t matchlen = DictMatchLength(data, id, l, max_length);

	85 // Transform "" + kIdentity + ""

	86 if (matchlen == l) {

	87 AddMatch(id, l, l, matches);

	88 found_match = true;

	89 }

	90 // Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing "

	91 if (matchlen >= l - 1) {

	92 AddMatch(id + 12 * n, l - 1, l, matches);

	93 if (l + 2 < max_length &&

	94 data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&

	95 data[l + 2] == ' ') {

	96 AddMatch(id + 49 * n, l + 3, l, matches);

	97 }

	98 found_match = true;

	99 }

	100 // Transform "" + kOmitLastN + "" (N = 2 .. 9)

	101 size_t minlen = min_length;

	102 if (l > 9) minlen = std::max(minlen, l - 9);

	103 size_t maxlen = std::min(matchlen, l - 2);

	104 for (size_t len = minlen; len <= maxlen; ++len) {

	105 AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);

	106 found_match = true;

	107 }

	108 if (matchlen < l \|\| l + 6 >= max_length) {

	109 continue;

	110 }

	111 const uint8_t* s = &data[l];

	112 // Transforms "" + kIdentity + <suffix>

	113 if (s[0] == ' ') {

	114 AddMatch(id + n, l + 1, l, matches);

	115 if (s[1] == 'a') {

	116 if (s[2] == ' ') {

	117 AddMatch(id + 28 * n, l + 3, l, matches);

	118 } else if (s[2] == 's') {

	119 if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);

	120 } else if (s[2] == 't') {

	121 if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);

	122 } else if (s[2] == 'n') {

	123 if (s[3] == 'd' && s[4] == ' ') {

	124 AddMatch(id + 10 * n, l + 5, l, matches);

	125 }

	126 }

	127 } else if (s[1] == 'b') {

	128 if (s[2] == 'y' && s[3] == ' ') {

	129 AddMatch(id + 38 * n, l + 4, l, matches);

	130 }

	131 } else if (s[1] == 'i') {

	132 if (s[2] == 'n') {

	133 if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);

	134 } else if (s[2] == 's') {

	135 if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);

	136 }

	137 } else if (s[1] == 'f') {

	138 if (s[2] == 'o') {

	139 if (s[3] == 'r' && s[4] == ' ') {

	140 AddMatch(id + 25 * n, l + 5, l, matches);

	141 }

	142 } else if (s[2] == 'r') {

	143 if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {

	144 AddMatch(id + 37 * n, l + 6, l, matches);

	145 }

	146 }

	147 } else if (s[1] == 'o') {

	148 if (s[2] == 'f') {

	149 if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);

	150 } else if (s[2] == 'n') {

	151 if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);

	152 }

	153 } else if (s[1] == 'n') {

	154 if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {

	155 AddMatch(id + 80 * n, l + 5, l, matches);

	156 }

	157 } else if (s[1] == 't') {

	158 if (s[2] == 'h') {

	159 if (s[3] == 'e') {

	160 if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);

	161 } else if (s[3] == 'a') {

	162 if (s[4] == 't' && s[5] == ' ') {

	163 AddMatch(id + 29 * n, l + 6, l, matches);

	164 }

	165 }

	166 } else if (s[2] == 'o') {

	167 if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);

	168 }

	169 } else if (s[1] == 'w') {

	170 if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {

	171 AddMatch(id + 35 * n, l + 6, l, matches);

	172 }

	173 }

	174 } else if (s[0] == '"') {

	175 AddMatch(id + 19 * n, l + 1, l, matches);

	176 if (s[1] == '>') {

	177 AddMatch(id + 21 * n, l + 2, l, matches);

	178 }

	179 } else if (s[0] == '.') {

	180 AddMatch(id + 20 * n, l + 1, l, matches);

	181 if (s[1] == ' ') {

	182 AddMatch(id + 31 * n, l + 2, l, matches);

	183 if (s[2] == 'T' && s[3] == 'h') {

	184 if (s[4] == 'e') {

	185 if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);

	186 } else if (s[4] == 'i') {

	187 if (s[5] == 's' && s[6] == ' ') {

	188 AddMatch(id + 75 * n, l + 7, l, matches);

	189 }

	190 }

	191 }

	192 }

	193 } else if (s[0] == ',') {

	194 AddMatch(id + 76 * n, l + 1, l, matches);

	195 if (s[1] == ' ') {

	196 AddMatch(id + 14 * n, l + 2, l, matches);

	197 }

	198 } else if (s[0] == '\n') {

	199 AddMatch(id + 22 * n, l + 1, l, matches);

	200 if (s[1] == '\t') {

	201 AddMatch(id + 50 * n, l + 2, l, matches);

	202 }

	203 } else if (s[0] == ']') {

	204 AddMatch(id + 24 * n, l + 1, l, matches);

	205 } else if (s[0] == '\'') {

	206 AddMatch(id + 36 * n, l + 1, l, matches);

	207 } else if (s[0] == ':') {

	208 AddMatch(id + 51 * n, l + 1, l, matches);

	209 } else if (s[0] == '(') {

	210 AddMatch(id + 57 * n, l + 1, l, matches);

	211 } else if (s[0] == '=') {

	212 if (s[1] == '"') {

	213 AddMatch(id + 70 * n, l + 2, l, matches);

	214 } else if (s[1] == '\'') {

	215 AddMatch(id + 86 * n, l + 2, l, matches);

	216 }

	217 } else if (s[0] == 'a') {

	218 if (s[1] == 'l' && s[2] == ' ') {

	219 AddMatch(id + 84 * n, l + 3, l, matches);

	220 }

	221 } else if (s[0] == 'e') {

	222 if (s[1] == 'd') {

	223 if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);

	224 } else if (s[1] == 'r') {

	225 if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);

	226 } else if (s[1] == 's') {

	227 if (s[2] == 't' && s[3] == ' ') {

	228 AddMatch(id + 95 * n, l + 4, l, matches);

	229 }

	230 }

	231 } else if (s[0] == 'f') {

	232 if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {

	233 AddMatch(id + 90 * n, l + 4, l, matches);

	234 }

	235 } else if (s[0] == 'i') {

	236 if (s[1] == 'v') {

	237 if (s[2] == 'e' && s[3] == ' ') {

	238 AddMatch(id + 92 * n, l + 4, l, matches);

	239 }

	240 } else if (s[1] == 'z') {

	241 if (s[2] == 'e' && s[3] == ' ') {

	242 AddMatch(id + 100 * n, l + 4, l, matches);

	243 }

	244 }

	245 } else if (s[0] == 'l') {

	246 if (s[1] == 'e') {

	247 if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {

	248 AddMatch(id + 93 * n, l + 5, l, matches);

	249 }

	250 } else if (s[1] == 'y') {

	251 if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);

	252 }

	253 } else if (s[0] == 'o') {

	254 if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {

	255 AddMatch(id + 106 * n, l + 4, l, matches);

	256 }

	257 }

	258 } else {

	259 // Set t=false for kUppercaseFirst and

	260 // t=true otherwise (kUppercaseAll) transform.

	261 const bool t = w.transform != kUppercaseFirst;

	262 if (!IsMatch(w, data, max_length)) {

	263 continue;

	264 }

	265 // Transform "" + kUppercase{First,All} + ""

	266 AddMatch(id + (t ? 44 : 9) * n, l, l, matches);

	267 found_match = true;

	268 if (l + 1 >= max_length) {

	269 continue;

	270 }

	271 // Transforms "" + kUppercase{First,All} + <suffix>

	272 const uint8_t* s = &data[l];

	273 if (s[0] == ' ') {

	274 AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);

	275 } else if (s[0] == '"') {

	276 AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches);

	277 if (s[1] == '>') {

	278 AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches);

	279 }

	280 } else if (s[0] == '.') {

	281 AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches);

	282 if (s[1] == ' ') {

	283 AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches);

	284 }

	285 } else if (s[0] == ',') {

	286 AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches);

	287 if (s[1] == ' ') {

	288 AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches);

	289 }

	290 } else if (s[0] == '\'') {

	291 AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches);

	292 } else if (s[0] == '(') {

	293 AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches);

	294 } else if (s[0] == '=') {

	295 if (s[1] == '"') {

	296 AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches);

	297 } else if (s[1] == '\'') {

	298 AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches);

	299 }

	300 }

	301 }

	302 }

	303 }

	304 // Transforms with prefixes " " and "."

	305 if (max_length >= 5 && (data[0] == ' ' \|\| data[0] == '.')) {

	306 bool is_space = (data[0] == ' ');

	307 key = Hash(&data[1]);

	308 bucket = kStaticDictionaryBuckets[key];

	309 size_t num = bucket & 0xff;

	310 size_t offset = bucket >> 8;

	311 for (size_t i = 0; i < num; ++i) {

	312 const DictWord w = kStaticDictionaryWords[offset + i];

	313 const size_t l = w.len;

	314 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];

	315 const size_t id = w.idx;

	316 if (w.transform == 0) {

	317 if (!IsMatch(w, &data[1], max_length - 1)) {

	318 continue;

	319 }

	320 // Transforms " " + kIdentity + "" and "." + kIdentity + ""

	321 AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);

	322 found_match = true;

	323 if (l + 2 >= max_length) {

	324 continue;

	325 }

	326 // Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>

	327 const uint8_t* s = &data[l + 1];

	328 if (s[0] == ' ') {

	329 AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);

	330 } else if (s[0] == '(') {

	331 AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);

	332 } else if (is_space) {

	333 if (s[0] == ',') {

	334 AddMatch(id + 103 * n, l + 2, l, matches);

	335 if (s[1] == ' ') {

	336 AddMatch(id + 33 * n, l + 3, l, matches);

	337 }

	338 } else if (s[0] == '.') {

	339 AddMatch(id + 71 * n, l + 2, l, matches);

	340 if (s[1] == ' ') {

	341 AddMatch(id + 52 * n, l + 3, l, matches);

	342 }

	343 } else if (s[0] == '=') {

	344 if (s[1] == '"') {

	345 AddMatch(id + 81 * n, l + 3, l, matches);

	346 } else if (s[1] == '\'') {

	347 AddMatch(id + 98 * n, l + 3, l, matches);

	348 }

	349 }

	350 }

	351 } else if (is_space) {

	352 // Set t=false for kUppercaseFirst and

	353 // t=true otherwise (kUppercaseAll) transform.

	354 const bool t = w.transform != kUppercaseFirst;

	355 if (!IsMatch(w, &data[1], max_length - 1)) {

	356 continue;

	357 }

	358 // Transforms " " + kUppercase{First,All} + ""

	359 AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);

	360 found_match = true;

	361 if (l + 2 >= max_length) {

	362 continue;

	363 }

	364 // Transforms " " + kUppercase{First,All} + <suffix>

	365 const uint8_t* s = &data[l + 1];

	366 if (s[0] == ' ') {

	367 AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);

	368 } else if (s[0] == ',') {

	369 if (!t) {

	370 AddMatch(id + 109 * n, l + 2, l, matches);

	371 }

	372 if (s[1] == ' ') {

	373 AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches);

	374 }

	375 } else if (s[0] == '.') {

	376 AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches);

	377 if (s[1] == ' ') {

	378 AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches);

	379 }

	380 } else if (s[0] == '=') {

	381 if (s[1] == '"') {

	382 AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches);

	383 } else if (s[1] == '\'') {

	384 AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches);

	385 }

	386 }

	387 }

	388 }

	389 }

	390 if (max_length >= 6) {

	391 // Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0"

	392 if ((data[1] == ' ' &&

	393 (data[0] == 'e' \|\| data[0] == 's' \|\| data[0] == ',')) \|\|

	394 (data[0] == 0xc2 && data[1] == 0xa0)) {

	395 key = Hash(&data[2]);

	396 bucket = kStaticDictionaryBuckets[key];

	397 size_t num = bucket & 0xff;

	398 size_t offset = bucket >> 8;

	399 for (size_t i = 0; i < num; ++i) {

	400 const DictWord w = kStaticDictionaryWords[offset + i];

	401 const size_t l = w.len;

	402 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];

	403 const size_t id = w.idx;

	404 if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {

	405 if (data[0] == 0xc2) {

	406 AddMatch(id + 102 * n, l + 2, l, matches);

	407 found_match = true;

	408 } else if (l + 2 < max_length && data[l + 2] == ' ') {

	409 size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);

	410 AddMatch(id + t * n, l + 3, l, matches);

	411 found_match = true;

	412 }

	413 }

	414 }

	415 }

	416 }

	417 if (max_length >= 9) {

	418 // Transforms with prefixes " the " and ".com/"

	419 if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&

	420 data[3] == 'e' && data[4] == ' ') \|\|

	421 (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&

	422 data[3] == 'm' && data[4] == '/')) {

	423 key = Hash(&data[5]);

	424 bucket = kStaticDictionaryBuckets[key];

	425 size_t num = bucket & 0xff;

	426 size_t offset = bucket >> 8;

	427 for (size_t i = 0; i < num; ++i) {

	428 const DictWord w = kStaticDictionaryWords[offset + i];

	429 const size_t l = w.len;

	430 const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];

	431 const size_t id = w.idx;

	432 if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {

	433 AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);

	434 found_match = true;

	435 if (l + 5 < max_length) {

	436 const uint8_t* s = &data[l + 5];

	437 if (data[0] == ' ') {

	438 if (l + 8 < max_length &&

	439 s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {

	440 AddMatch(id + 62 * n, l + 9, l, matches);

	441 if (l + 12 < max_length &&

	442 s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {

	443 AddMatch(id + 73 * n, l + 13, l, matches);

	444 }

	445 }

	446 }

	447 }

	448 }

	449 }

	450 }

	451 }

	452 return found_match;

	453 }

	454

	455 } // namespace brotli

OLD	NEW

« no previous file with comments | « third_party/brotli/enc/static_dict.h ('k') | third_party/brotli/enc/static_dict_lut.h » ('j') | no next file with comments »