OLD | NEW |
| (Empty) |
1 /* Copyright 2010 Google Inc. All Rights Reserved. | |
2 | |
3 Distributed under MIT license. | |
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | |
5 */ | |
6 | |
7 // Transformations on dictionary words. | |
8 | |
9 #ifndef BROTLI_ENC_TRANSFORM_H_ | |
10 #define BROTLI_ENC_TRANSFORM_H_ | |
11 | |
12 #include <string> | |
13 | |
14 #include "./dictionary.h" | |
15 | |
16 namespace brotli { | |
17 | |
18 enum WordTransformType { | |
19 kIdentity = 0, | |
20 kOmitLast1 = 1, | |
21 kOmitLast2 = 2, | |
22 kOmitLast3 = 3, | |
23 kOmitLast4 = 4, | |
24 kOmitLast5 = 5, | |
25 kOmitLast6 = 6, | |
26 kOmitLast7 = 7, | |
27 kOmitLast8 = 8, | |
28 kOmitLast9 = 9, | |
29 kUppercaseFirst = 10, | |
30 kUppercaseAll = 11, | |
31 kOmitFirst1 = 12, | |
32 kOmitFirst2 = 13, | |
33 kOmitFirst3 = 14, | |
34 kOmitFirst4 = 15, | |
35 kOmitFirst5 = 16, | |
36 kOmitFirst6 = 17, | |
37 kOmitFirst7 = 18, | |
38 kOmitFirst8 = 19, | |
39 kOmitFirst9 = 20 | |
40 }; | |
41 | |
42 struct Transform { | |
43 const char* prefix; | |
44 WordTransformType word_transform; | |
45 const char* suffix; | |
46 }; | |
47 | |
48 static const Transform kTransforms[] = { | |
49 { "", kIdentity, "" }, | |
50 { "", kIdentity, " " }, | |
51 { " ", kIdentity, " " }, | |
52 { "", kOmitFirst1, "" }, | |
53 { "", kUppercaseFirst, " " }, | |
54 { "", kIdentity, " the " }, | |
55 { " ", kIdentity, "" }, | |
56 { "s ", kIdentity, " " }, | |
57 { "", kIdentity, " of " }, | |
58 { "", kUppercaseFirst, "" }, | |
59 { "", kIdentity, " and " }, | |
60 { "", kOmitFirst2, "" }, | |
61 { "", kOmitLast1, "" }, | |
62 { ", ", kIdentity, " " }, | |
63 { "", kIdentity, ", " }, | |
64 { " ", kUppercaseFirst, " " }, | |
65 { "", kIdentity, " in " }, | |
66 { "", kIdentity, " to " }, | |
67 { "e ", kIdentity, " " }, | |
68 { "", kIdentity, "\"" }, | |
69 { "", kIdentity, "." }, | |
70 { "", kIdentity, "\">" }, | |
71 { "", kIdentity, "\n" }, | |
72 { "", kOmitLast3, "" }, | |
73 { "", kIdentity, "]" }, | |
74 { "", kIdentity, " for " }, | |
75 { "", kOmitFirst3, "" }, | |
76 { "", kOmitLast2, "" }, | |
77 { "", kIdentity, " a " }, | |
78 { "", kIdentity, " that " }, | |
79 { " ", kUppercaseFirst, "" }, | |
80 { "", kIdentity, ". " }, | |
81 { ".", kIdentity, "" }, | |
82 { " ", kIdentity, ", " }, | |
83 { "", kOmitFirst4, "" }, | |
84 { "", kIdentity, " with " }, | |
85 { "", kIdentity, "'" }, | |
86 { "", kIdentity, " from " }, | |
87 { "", kIdentity, " by " }, | |
88 { "", kOmitFirst5, "" }, | |
89 { "", kOmitFirst6, "" }, | |
90 { " the ", kIdentity, "" }, | |
91 { "", kOmitLast4, "" }, | |
92 { "", kIdentity, ". The " }, | |
93 { "", kUppercaseAll, "" }, | |
94 { "", kIdentity, " on " }, | |
95 { "", kIdentity, " as " }, | |
96 { "", kIdentity, " is " }, | |
97 { "", kOmitLast7, "" }, | |
98 { "", kOmitLast1, "ing " }, | |
99 { "", kIdentity, "\n\t" }, | |
100 { "", kIdentity, ":" }, | |
101 { " ", kIdentity, ". " }, | |
102 { "", kIdentity, "ed " }, | |
103 { "", kOmitFirst9, "" }, | |
104 { "", kOmitFirst7, "" }, | |
105 { "", kOmitLast6, "" }, | |
106 { "", kIdentity, "(" }, | |
107 { "", kUppercaseFirst, ", " }, | |
108 { "", kOmitLast8, "" }, | |
109 { "", kIdentity, " at " }, | |
110 { "", kIdentity, "ly " }, | |
111 { " the ", kIdentity, " of " }, | |
112 { "", kOmitLast5, "" }, | |
113 { "", kOmitLast9, "" }, | |
114 { " ", kUppercaseFirst, ", " }, | |
115 { "", kUppercaseFirst, "\"" }, | |
116 { ".", kIdentity, "(" }, | |
117 { "", kUppercaseAll, " " }, | |
118 { "", kUppercaseFirst, "\">" }, | |
119 { "", kIdentity, "=\"" }, | |
120 { " ", kIdentity, "." }, | |
121 { ".com/", kIdentity, "" }, | |
122 { " the ", kIdentity, " of the " }, | |
123 { "", kUppercaseFirst, "'" }, | |
124 { "", kIdentity, ". This " }, | |
125 { "", kIdentity, "," }, | |
126 { ".", kIdentity, " " }, | |
127 { "", kUppercaseFirst, "(" }, | |
128 { "", kUppercaseFirst, "." }, | |
129 { "", kIdentity, " not " }, | |
130 { " ", kIdentity, "=\"" }, | |
131 { "", kIdentity, "er " }, | |
132 { " ", kUppercaseAll, " " }, | |
133 { "", kIdentity, "al " }, | |
134 { " ", kUppercaseAll, "" }, | |
135 { "", kIdentity, "='" }, | |
136 { "", kUppercaseAll, "\"" }, | |
137 { "", kUppercaseFirst, ". " }, | |
138 { " ", kIdentity, "(" }, | |
139 { "", kIdentity, "ful " }, | |
140 { " ", kUppercaseFirst, ". " }, | |
141 { "", kIdentity, "ive " }, | |
142 { "", kIdentity, "less " }, | |
143 { "", kUppercaseAll, "'" }, | |
144 { "", kIdentity, "est " }, | |
145 { " ", kUppercaseFirst, "." }, | |
146 { "", kUppercaseAll, "\">" }, | |
147 { " ", kIdentity, "='" }, | |
148 { "", kUppercaseFirst, "," }, | |
149 { "", kIdentity, "ize " }, | |
150 { "", kUppercaseAll, "." }, | |
151 { "\xc2\xa0", kIdentity, "" }, | |
152 { " ", kIdentity, "," }, | |
153 { "", kUppercaseFirst, "=\"" }, | |
154 { "", kUppercaseAll, "=\"" }, | |
155 { "", kIdentity, "ous " }, | |
156 { "", kUppercaseAll, ", " }, | |
157 { "", kUppercaseFirst, "='" }, | |
158 { " ", kUppercaseFirst, "," }, | |
159 { " ", kUppercaseAll, "=\"" }, | |
160 { " ", kUppercaseAll, ", " }, | |
161 { "", kUppercaseAll, "," }, | |
162 { "", kUppercaseAll, "(" }, | |
163 { "", kUppercaseAll, ". " }, | |
164 { " ", kUppercaseAll, "." }, | |
165 { "", kUppercaseAll, "='" }, | |
166 { " ", kUppercaseAll, ". " }, | |
167 { " ", kUppercaseFirst, "=\"" }, | |
168 { " ", kUppercaseAll, "='" }, | |
169 { " ", kUppercaseFirst, "='" }, | |
170 }; | |
171 | |
172 static const size_t kNumTransforms = | |
173 sizeof(kTransforms) / sizeof(kTransforms[0]); | |
174 | |
175 static const size_t kOmitLastNTransforms[10] = { | |
176 0, 12, 27, 23, 42, 63, 56, 48, 59, 64, | |
177 }; | |
178 | |
179 static size_t ToUpperCase(uint8_t *p, size_t len) { | |
180 if (len == 1 || p[0] < 0xc0) { | |
181 if (p[0] >= 'a' && p[0] <= 'z') { | |
182 p[0] ^= 32; | |
183 } | |
184 return 1; | |
185 } | |
186 if (p[0] < 0xe0) { | |
187 p[1] ^= 32; | |
188 return 2; | |
189 } | |
190 if (len == 2) { | |
191 return 2; | |
192 } | |
193 p[2] ^= 5; | |
194 return 3; | |
195 } | |
196 | |
197 inline std::string TransformWord( | |
198 WordTransformType transform_type, const uint8_t* word, size_t len) { | |
199 if (transform_type <= kOmitLast9) { | |
200 if (len <= static_cast<size_t>(transform_type)) { | |
201 return std::string(); | |
202 } | |
203 return std::string(word, word + len - transform_type); | |
204 } | |
205 | |
206 if (transform_type >= kOmitFirst1) { | |
207 const size_t skip = transform_type - (kOmitFirst1 - 1); | |
208 if (len <= skip) { | |
209 return std::string(); | |
210 } | |
211 return std::string(word + skip, word + len); | |
212 } | |
213 | |
214 std::string ret = std::string(word, word + len); | |
215 uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]); | |
216 if (transform_type == kUppercaseFirst) { | |
217 ToUpperCase(uppercase, len); | |
218 } else if (transform_type == kUppercaseAll) { | |
219 size_t position = 0; | |
220 while (position < len) { | |
221 size_t step = ToUpperCase(uppercase, len - position); | |
222 uppercase += step; | |
223 position += step; | |
224 } | |
225 } | |
226 return ret; | |
227 } | |
228 | |
229 inline std::string ApplyTransform( | |
230 const Transform& t, const uint8_t* word, size_t len) { | |
231 return std::string(t.prefix) + | |
232 TransformWord(t.word_transform, word, len) + std::string(t.suffix); | |
233 } | |
234 | |
235 inline std::string GetTransformedDictionaryWord(size_t len_code, | |
236 size_t word_id) { | |
237 size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code]; | |
238 size_t offset = kBrotliDictionaryOffsetsByLength[len_code]; | |
239 size_t t = word_id / num_words; | |
240 size_t word_idx = word_id % num_words; | |
241 offset += len_code * word_idx; | |
242 const uint8_t* word = &kBrotliDictionary[offset]; | |
243 return ApplyTransform(kTransforms[t], word, len_code); | |
244 } | |
245 | |
246 } // namespace brotli | |
247 | |
248 #endif // BROTLI_ENC_TRANSFORM_H_ | |
OLD | NEW |