OLD | NEW |
| (Empty) |
1 /* Copyright 2013 Google Inc. All Rights Reserved. | |
2 | |
3 Licensed under the Apache License, Version 2.0 (the "License"); | |
4 you may not use this file except in compliance with the License. | |
5 You may obtain a copy of the License at | |
6 | |
7 http://www.apache.org/licenses/LICENSE-2.0 | |
8 | |
9 Unless required by applicable law or agreed to in writing, software | |
10 distributed under the License is distributed on an "AS IS" BASIS, | |
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 See the License for the specific language governing permissions and | |
13 limitations under the License. | |
14 | |
15 Transformations on dictionary words. | |
16 */ | |
17 | |
18 #ifndef BROTLI_DEC_TRANSFORM_H_ | |
19 #define BROTLI_DEC_TRANSFORM_H_ | |
20 | |
21 #include <stdio.h> | |
22 #include <ctype.h> | |
23 #include "./types.h" | |
24 | |
25 #if defined(__cplusplus) || defined(c_plusplus) | |
26 extern "C" { | |
27 #endif | |
28 | |
29 enum WordTransformType { | |
30 kIdentity = 0, | |
31 kOmitLast1 = 1, | |
32 kOmitLast2 = 2, | |
33 kOmitLast3 = 3, | |
34 kOmitLast4 = 4, | |
35 kOmitLast5 = 5, | |
36 kOmitLast6 = 6, | |
37 kOmitLast7 = 7, | |
38 kOmitLast8 = 8, | |
39 kOmitLast9 = 9, | |
40 kUppercaseFirst = 10, | |
41 kUppercaseAll = 11, | |
42 kOmitFirst1 = 12, | |
43 kOmitFirst2 = 13, | |
44 kOmitFirst3 = 14, | |
45 kOmitFirst4 = 15, | |
46 kOmitFirst5 = 16, | |
47 kOmitFirst6 = 17, | |
48 kOmitFirst7 = 18, | |
49 kOmitFirst8 = 19, | |
50 kOmitFirst9 = 20 | |
51 }; | |
52 | |
53 typedef struct { | |
54 const char* prefix; | |
55 enum WordTransformType transform; | |
56 const char* suffix; | |
57 } Transform; | |
58 | |
59 static const Transform kTransforms[] = { | |
60 { "", kIdentity, "" }, | |
61 { "", kIdentity, " " }, | |
62 { " ", kIdentity, " " }, | |
63 { "", kOmitFirst1, "" }, | |
64 { "", kUppercaseFirst, " " }, | |
65 { "", kIdentity, " the " }, | |
66 { " ", kIdentity, "" }, | |
67 { "s ", kIdentity, " " }, | |
68 { "", kIdentity, " of " }, | |
69 { "", kUppercaseFirst, "" }, | |
70 { "", kIdentity, " and " }, | |
71 { "", kOmitFirst2, "" }, | |
72 { "", kOmitLast1, "" }, | |
73 { ", ", kIdentity, " " }, | |
74 { "", kIdentity, ", " }, | |
75 { " ", kUppercaseFirst, " " }, | |
76 { "", kIdentity, " in " }, | |
77 { "", kIdentity, " to " }, | |
78 { "e ", kIdentity, " " }, | |
79 { "", kIdentity, "\"" }, | |
80 { "", kIdentity, "." }, | |
81 { "", kIdentity, "\">" }, | |
82 { "", kIdentity, "\n" }, | |
83 { "", kOmitLast3, "" }, | |
84 { "", kIdentity, "]" }, | |
85 { "", kIdentity, " for " }, | |
86 { "", kOmitFirst3, "" }, | |
87 { "", kOmitLast2, "" }, | |
88 { "", kIdentity, " a " }, | |
89 { "", kIdentity, " that " }, | |
90 { " ", kUppercaseFirst, "" }, | |
91 { "", kIdentity, ". " }, | |
92 { ".", kIdentity, "" }, | |
93 { " ", kIdentity, ", " }, | |
94 { "", kOmitFirst4, "" }, | |
95 { "", kIdentity, " with " }, | |
96 { "", kIdentity, "'" }, | |
97 { "", kIdentity, " from " }, | |
98 { "", kIdentity, " by " }, | |
99 { "", kOmitFirst5, "" }, | |
100 { "", kOmitFirst6, "" }, | |
101 { " the ", kIdentity, "" }, | |
102 { "", kOmitLast4, "" }, | |
103 { "", kIdentity, ". The " }, | |
104 { "", kUppercaseAll, "" }, | |
105 { "", kIdentity, " on " }, | |
106 { "", kIdentity, " as " }, | |
107 { "", kIdentity, " is " }, | |
108 { "", kOmitLast7, "" }, | |
109 { "", kOmitLast1, "ing " }, | |
110 { "", kIdentity, "\n\t" }, | |
111 { "", kIdentity, ":" }, | |
112 { " ", kIdentity, ". " }, | |
113 { "", kIdentity, "ed " }, | |
114 { "", kOmitFirst9, "" }, | |
115 { "", kOmitFirst7, "" }, | |
116 { "", kOmitLast6, "" }, | |
117 { "", kIdentity, "(" }, | |
118 { "", kUppercaseFirst, ", " }, | |
119 { "", kOmitLast8, "" }, | |
120 { "", kIdentity, " at " }, | |
121 { "", kIdentity, "ly " }, | |
122 { " the ", kIdentity, " of " }, | |
123 { "", kOmitLast5, "" }, | |
124 { "", kOmitLast9, "" }, | |
125 { " ", kUppercaseFirst, ", " }, | |
126 { "", kUppercaseFirst, "\"" }, | |
127 { ".", kIdentity, "(" }, | |
128 { "", kUppercaseAll, " " }, | |
129 { "", kUppercaseFirst, "\">" }, | |
130 { "", kIdentity, "=\"" }, | |
131 { " ", kIdentity, "." }, | |
132 { ".com/", kIdentity, "" }, | |
133 { " the ", kIdentity, " of the " }, | |
134 { "", kUppercaseFirst, "'" }, | |
135 { "", kIdentity, ". This " }, | |
136 { "", kIdentity, "," }, | |
137 { ".", kIdentity, " " }, | |
138 { "", kUppercaseFirst, "(" }, | |
139 { "", kUppercaseFirst, "." }, | |
140 { "", kIdentity, " not " }, | |
141 { " ", kIdentity, "=\"" }, | |
142 { "", kIdentity, "er " }, | |
143 { " ", kUppercaseAll, " " }, | |
144 { "", kIdentity, "al " }, | |
145 { " ", kUppercaseAll, "" }, | |
146 { "", kIdentity, "='" }, | |
147 { "", kUppercaseAll, "\"" }, | |
148 { "", kUppercaseFirst, ". " }, | |
149 { " ", kIdentity, "(" }, | |
150 { "", kIdentity, "ful " }, | |
151 { " ", kUppercaseFirst, ". " }, | |
152 { "", kIdentity, "ive " }, | |
153 { "", kIdentity, "less " }, | |
154 { "", kUppercaseAll, "'" }, | |
155 { "", kIdentity, "est " }, | |
156 { " ", kUppercaseFirst, "." }, | |
157 { "", kUppercaseAll, "\">" }, | |
158 { " ", kIdentity, "='" }, | |
159 { "", kUppercaseFirst, "," }, | |
160 { "", kIdentity, "ize " }, | |
161 { "", kUppercaseAll, "." }, | |
162 { "\xc2\xa0", kIdentity, "" }, | |
163 { " ", kIdentity, "," }, | |
164 { "", kUppercaseFirst, "=\"" }, | |
165 { "", kUppercaseAll, "=\"" }, | |
166 { "", kIdentity, "ous " }, | |
167 { "", kUppercaseAll, ", " }, | |
168 { "", kUppercaseFirst, "='" }, | |
169 { " ", kUppercaseFirst, "," }, | |
170 { " ", kUppercaseAll, "=\"" }, | |
171 { " ", kUppercaseAll, ", " }, | |
172 { "", kUppercaseAll, "," }, | |
173 { "", kUppercaseAll, "(" }, | |
174 { "", kUppercaseAll, ". " }, | |
175 { " ", kUppercaseAll, "." }, | |
176 { "", kUppercaseAll, "='" }, | |
177 { " ", kUppercaseAll, ". " }, | |
178 { " ", kUppercaseFirst, "=\"" }, | |
179 { " ", kUppercaseAll, "='" }, | |
180 { " ", kUppercaseFirst, "='" }, | |
181 }; | |
182 | |
183 static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]); | |
184 | |
185 static int ToUpperCase(uint8_t *p) { | |
186 if (p[0] < 0xc0) { | |
187 if (p[0] >= 'a' && p[0] <= 'z') { | |
188 p[0] ^= 32; | |
189 } | |
190 return 1; | |
191 } | |
192 /* An overly simplified uppercasing model for utf-8. */ | |
193 if (p[0] < 0xe0) { | |
194 p[1] ^= 32; | |
195 return 2; | |
196 } | |
197 /* An arbitrary transform for three byte characters. */ | |
198 p[2] ^= 5; | |
199 return 3; | |
200 } | |
201 | |
202 static BROTLI_INLINE int TransformDictionaryWord( | |
203 uint8_t* dst, const uint8_t* word, int len, int transform) { | |
204 const char* prefix = kTransforms[transform].prefix; | |
205 const char* suffix = kTransforms[transform].suffix; | |
206 const int t = kTransforms[transform].transform; | |
207 int skip = t < kOmitFirst1 ? 0 : t - (kOmitFirst1 - 1); | |
208 int idx = 0; | |
209 int i = 0; | |
210 uint8_t* uppercase; | |
211 if (skip > len) { | |
212 skip = len; | |
213 } | |
214 while (*prefix) { dst[idx++] = (uint8_t)*prefix++; } | |
215 word += skip; | |
216 len -= skip; | |
217 if (t <= kOmitLast9) { | |
218 len -= t; | |
219 } | |
220 while (i < len) { dst[idx++] = word[i++]; } | |
221 uppercase = &dst[idx - len]; | |
222 if (t == kUppercaseFirst) { | |
223 ToUpperCase(uppercase); | |
224 } else if (t == kUppercaseAll) { | |
225 while (len > 0) { | |
226 int step = ToUpperCase(uppercase); | |
227 uppercase += step; | |
228 len -= step; | |
229 } | |
230 } | |
231 while (*suffix) { dst[idx++] = (uint8_t)*suffix++; } | |
232 return idx; | |
233 } | |
234 | |
235 #if defined(__cplusplus) || defined(c_plusplus) | |
236 } /* extern "C" */ | |
237 #endif | |
238 | |
239 #endif /* BROTLI_DEC_TRANSFORM_H_ */ | |
OLD | NEW |