| OLD | NEW |
| (Empty) | |
| 1 /* Copyright 2010 Google Inc. All Rights Reserved. |
| 2 |
| 3 Distributed under MIT license. |
| 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| 5 */ |
| 6 |
| 7 // Transformations on dictionary words. |
| 8 |
| 9 #ifndef BROTLI_ENC_TRANSFORM_H_ |
| 10 #define BROTLI_ENC_TRANSFORM_H_ |
| 11 |
| 12 #include <string> |
| 13 |
| 14 #include "./dictionary.h" |
| 15 |
| 16 namespace brotli { |
| 17 |
| 18 enum WordTransformType { |
| 19 kIdentity = 0, |
| 20 kOmitLast1 = 1, |
| 21 kOmitLast2 = 2, |
| 22 kOmitLast3 = 3, |
| 23 kOmitLast4 = 4, |
| 24 kOmitLast5 = 5, |
| 25 kOmitLast6 = 6, |
| 26 kOmitLast7 = 7, |
| 27 kOmitLast8 = 8, |
| 28 kOmitLast9 = 9, |
| 29 kUppercaseFirst = 10, |
| 30 kUppercaseAll = 11, |
| 31 kOmitFirst1 = 12, |
| 32 kOmitFirst2 = 13, |
| 33 kOmitFirst3 = 14, |
| 34 kOmitFirst4 = 15, |
| 35 kOmitFirst5 = 16, |
| 36 kOmitFirst6 = 17, |
| 37 kOmitFirst7 = 18, |
| 38 kOmitFirst8 = 19, |
| 39 kOmitFirst9 = 20 |
| 40 }; |
| 41 |
| 42 struct Transform { |
| 43 const char* prefix; |
| 44 WordTransformType word_transform; |
| 45 const char* suffix; |
| 46 }; |
| 47 |
| 48 static const Transform kTransforms[] = { |
| 49 { "", kIdentity, "" }, |
| 50 { "", kIdentity, " " }, |
| 51 { " ", kIdentity, " " }, |
| 52 { "", kOmitFirst1, "" }, |
| 53 { "", kUppercaseFirst, " " }, |
| 54 { "", kIdentity, " the " }, |
| 55 { " ", kIdentity, "" }, |
| 56 { "s ", kIdentity, " " }, |
| 57 { "", kIdentity, " of " }, |
| 58 { "", kUppercaseFirst, "" }, |
| 59 { "", kIdentity, " and " }, |
| 60 { "", kOmitFirst2, "" }, |
| 61 { "", kOmitLast1, "" }, |
| 62 { ", ", kIdentity, " " }, |
| 63 { "", kIdentity, ", " }, |
| 64 { " ", kUppercaseFirst, " " }, |
| 65 { "", kIdentity, " in " }, |
| 66 { "", kIdentity, " to " }, |
| 67 { "e ", kIdentity, " " }, |
| 68 { "", kIdentity, "\"" }, |
| 69 { "", kIdentity, "." }, |
| 70 { "", kIdentity, "\">" }, |
| 71 { "", kIdentity, "\n" }, |
| 72 { "", kOmitLast3, "" }, |
| 73 { "", kIdentity, "]" }, |
| 74 { "", kIdentity, " for " }, |
| 75 { "", kOmitFirst3, "" }, |
| 76 { "", kOmitLast2, "" }, |
| 77 { "", kIdentity, " a " }, |
| 78 { "", kIdentity, " that " }, |
| 79 { " ", kUppercaseFirst, "" }, |
| 80 { "", kIdentity, ". " }, |
| 81 { ".", kIdentity, "" }, |
| 82 { " ", kIdentity, ", " }, |
| 83 { "", kOmitFirst4, "" }, |
| 84 { "", kIdentity, " with " }, |
| 85 { "", kIdentity, "'" }, |
| 86 { "", kIdentity, " from " }, |
| 87 { "", kIdentity, " by " }, |
| 88 { "", kOmitFirst5, "" }, |
| 89 { "", kOmitFirst6, "" }, |
| 90 { " the ", kIdentity, "" }, |
| 91 { "", kOmitLast4, "" }, |
| 92 { "", kIdentity, ". The " }, |
| 93 { "", kUppercaseAll, "" }, |
| 94 { "", kIdentity, " on " }, |
| 95 { "", kIdentity, " as " }, |
| 96 { "", kIdentity, " is " }, |
| 97 { "", kOmitLast7, "" }, |
| 98 { "", kOmitLast1, "ing " }, |
| 99 { "", kIdentity, "\n\t" }, |
| 100 { "", kIdentity, ":" }, |
| 101 { " ", kIdentity, ". " }, |
| 102 { "", kIdentity, "ed " }, |
| 103 { "", kOmitFirst9, "" }, |
| 104 { "", kOmitFirst7, "" }, |
| 105 { "", kOmitLast6, "" }, |
| 106 { "", kIdentity, "(" }, |
| 107 { "", kUppercaseFirst, ", " }, |
| 108 { "", kOmitLast8, "" }, |
| 109 { "", kIdentity, " at " }, |
| 110 { "", kIdentity, "ly " }, |
| 111 { " the ", kIdentity, " of " }, |
| 112 { "", kOmitLast5, "" }, |
| 113 { "", kOmitLast9, "" }, |
| 114 { " ", kUppercaseFirst, ", " }, |
| 115 { "", kUppercaseFirst, "\"" }, |
| 116 { ".", kIdentity, "(" }, |
| 117 { "", kUppercaseAll, " " }, |
| 118 { "", kUppercaseFirst, "\">" }, |
| 119 { "", kIdentity, "=\"" }, |
| 120 { " ", kIdentity, "." }, |
| 121 { ".com/", kIdentity, "" }, |
| 122 { " the ", kIdentity, " of the " }, |
| 123 { "", kUppercaseFirst, "'" }, |
| 124 { "", kIdentity, ". This " }, |
| 125 { "", kIdentity, "," }, |
| 126 { ".", kIdentity, " " }, |
| 127 { "", kUppercaseFirst, "(" }, |
| 128 { "", kUppercaseFirst, "." }, |
| 129 { "", kIdentity, " not " }, |
| 130 { " ", kIdentity, "=\"" }, |
| 131 { "", kIdentity, "er " }, |
| 132 { " ", kUppercaseAll, " " }, |
| 133 { "", kIdentity, "al " }, |
| 134 { " ", kUppercaseAll, "" }, |
| 135 { "", kIdentity, "='" }, |
| 136 { "", kUppercaseAll, "\"" }, |
| 137 { "", kUppercaseFirst, ". " }, |
| 138 { " ", kIdentity, "(" }, |
| 139 { "", kIdentity, "ful " }, |
| 140 { " ", kUppercaseFirst, ". " }, |
| 141 { "", kIdentity, "ive " }, |
| 142 { "", kIdentity, "less " }, |
| 143 { "", kUppercaseAll, "'" }, |
| 144 { "", kIdentity, "est " }, |
| 145 { " ", kUppercaseFirst, "." }, |
| 146 { "", kUppercaseAll, "\">" }, |
| 147 { " ", kIdentity, "='" }, |
| 148 { "", kUppercaseFirst, "," }, |
| 149 { "", kIdentity, "ize " }, |
| 150 { "", kUppercaseAll, "." }, |
| 151 { "\xc2\xa0", kIdentity, "" }, |
| 152 { " ", kIdentity, "," }, |
| 153 { "", kUppercaseFirst, "=\"" }, |
| 154 { "", kUppercaseAll, "=\"" }, |
| 155 { "", kIdentity, "ous " }, |
| 156 { "", kUppercaseAll, ", " }, |
| 157 { "", kUppercaseFirst, "='" }, |
| 158 { " ", kUppercaseFirst, "," }, |
| 159 { " ", kUppercaseAll, "=\"" }, |
| 160 { " ", kUppercaseAll, ", " }, |
| 161 { "", kUppercaseAll, "," }, |
| 162 { "", kUppercaseAll, "(" }, |
| 163 { "", kUppercaseAll, ". " }, |
| 164 { " ", kUppercaseAll, "." }, |
| 165 { "", kUppercaseAll, "='" }, |
| 166 { " ", kUppercaseAll, ". " }, |
| 167 { " ", kUppercaseFirst, "=\"" }, |
| 168 { " ", kUppercaseAll, "='" }, |
| 169 { " ", kUppercaseFirst, "='" }, |
| 170 }; |
| 171 |
| 172 static const size_t kNumTransforms = |
| 173 sizeof(kTransforms) / sizeof(kTransforms[0]); |
| 174 |
| 175 static const size_t kOmitLastNTransforms[10] = { |
| 176 0, 12, 27, 23, 42, 63, 56, 48, 59, 64, |
| 177 }; |
| 178 |
| 179 static size_t ToUpperCase(uint8_t *p, size_t len) { |
| 180 if (len == 1 || p[0] < 0xc0) { |
| 181 if (p[0] >= 'a' && p[0] <= 'z') { |
| 182 p[0] ^= 32; |
| 183 } |
| 184 return 1; |
| 185 } |
| 186 if (p[0] < 0xe0) { |
| 187 p[1] ^= 32; |
| 188 return 2; |
| 189 } |
| 190 if (len == 2) { |
| 191 return 2; |
| 192 } |
| 193 p[2] ^= 5; |
| 194 return 3; |
| 195 } |
| 196 |
| 197 inline std::string TransformWord( |
| 198 WordTransformType transform_type, const uint8_t* word, size_t len) { |
| 199 if (transform_type <= kOmitLast9) { |
| 200 if (len <= static_cast<size_t>(transform_type)) { |
| 201 return std::string(); |
| 202 } |
| 203 return std::string(word, word + len - transform_type); |
| 204 } |
| 205 |
| 206 if (transform_type >= kOmitFirst1) { |
| 207 const size_t skip = transform_type - (kOmitFirst1 - 1); |
| 208 if (len <= skip) { |
| 209 return std::string(); |
| 210 } |
| 211 return std::string(word + skip, word + len); |
| 212 } |
| 213 |
| 214 std::string ret = std::string(word, word + len); |
| 215 uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]); |
| 216 if (transform_type == kUppercaseFirst) { |
| 217 ToUpperCase(uppercase, len); |
| 218 } else if (transform_type == kUppercaseAll) { |
| 219 size_t position = 0; |
| 220 while (position < len) { |
| 221 size_t step = ToUpperCase(uppercase, len - position); |
| 222 uppercase += step; |
| 223 position += step; |
| 224 } |
| 225 } |
| 226 return ret; |
| 227 } |
| 228 |
| 229 inline std::string ApplyTransform( |
| 230 const Transform& t, const uint8_t* word, size_t len) { |
| 231 return std::string(t.prefix) + |
| 232 TransformWord(t.word_transform, word, len) + std::string(t.suffix); |
| 233 } |
| 234 |
| 235 inline std::string GetTransformedDictionaryWord(size_t len_code, |
| 236 size_t word_id) { |
| 237 size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code]; |
| 238 size_t offset = kBrotliDictionaryOffsetsByLength[len_code]; |
| 239 size_t t = word_id / num_words; |
| 240 size_t word_idx = word_id % num_words; |
| 241 offset += len_code * word_idx; |
| 242 const uint8_t* word = &kBrotliDictionary[offset]; |
| 243 return ApplyTransform(kTransforms[t], word, len_code); |
| 244 } |
| 245 |
| 246 } // namespace brotli |
| 247 |
| 248 #endif // BROTLI_ENC_TRANSFORM_H_ |
| OLD | NEW |