OLD | NEW |
1 /* ***** BEGIN LICENSE BLOCK ***** | 1 /* ***** BEGIN LICENSE BLOCK ***** |
2 * | 2 * |
3 * Copyright (C) 2015 The Android Open Source Project | 3 * Copyright (C) 2015 The Android Open Source Project |
4 * | 4 * |
5 * Licensed under the Apache License, Version 2.0 (the "License"); | 5 * Licensed under the Apache License, Version 2.0 (the "License"); |
6 * you may not use this file except in compliance with the License. | 6 * you may not use this file except in compliance with the License. |
7 * You may obtain a copy of the License at | 7 * You may obtain a copy of the License at |
8 * | 8 * |
9 * http://www.apache.org/licenses/LICENSE-2.0 | 9 * http://www.apache.org/licenses/LICENSE-2.0 |
10 * | 10 * |
(...skipping 15 matching lines...) Expand all Loading... |
26 #include <fcntl.h> | 26 #include <fcntl.h> |
27 | 27 |
28 #include "platform/text/hyphenation/HyphenatorAOSP.h" | 28 #include "platform/text/hyphenation/HyphenatorAOSP.h" |
29 | 29 |
30 using std::vector; | 30 using std::vector; |
31 | 31 |
32 namespace android { | 32 namespace android { |
33 | 33 |
34 static const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; | 34 static const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; |
35 | 35 |
36 // The following are structs that correspond to tables inside the hyb file forma
t | 36 // The following are structs that correspond to tables inside the hyb file |
| 37 // format |
37 | 38 |
38 struct AlphabetTable0 { | 39 struct AlphabetTable0 { |
39 uint32_t version; | 40 uint32_t version; |
40 uint32_t min_codepoint; | 41 uint32_t min_codepoint; |
41 uint32_t max_codepoint; | 42 uint32_t max_codepoint; |
42 uint8_t data[1]; // actually flexible array, size is known at runtime | 43 uint8_t data[1]; // actually flexible array, size is known at runtime |
43 }; | 44 }; |
44 | 45 |
45 struct AlphabetTable1 { | 46 struct AlphabetTable1 { |
46 uint32_t version; | 47 uint32_t version; |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
124 if (alphabetLookup(alpha_codes, word, len)) { | 125 if (alphabetLookup(alpha_codes, word, len)) { |
125 hyphenateFromCodes(result->data(), alpha_codes, paddedLen); | 126 hyphenateFromCodes(result->data(), alpha_codes, paddedLen); |
126 return; | 127 return; |
127 } | 128 } |
128 // TODO: try NFC normalization | 129 // TODO: try NFC normalization |
129 // TODO: handle non-BMP Unicode (requires remapping of offsets) | 130 // TODO: handle non-BMP Unicode (requires remapping of offsets) |
130 } | 131 } |
131 hyphenateSoft(result->data(), word, len); | 132 hyphenateSoft(result->data(), word, len); |
132 } | 133 } |
133 | 134 |
134 // If any soft hyphen is present in the word, use soft hyphens to decide hyphena
tion, | 135 // If any soft hyphen is present in the word, use soft hyphens to decide |
135 // as recommended in UAX #14 (Use of Soft Hyphen) | 136 // hyphenation, as recommended in UAX #14 (Use of Soft Hyphen) |
136 void Hyphenator::hyphenateSoft(uint8_t* result, | 137 void Hyphenator::hyphenateSoft(uint8_t* result, |
137 const uint16_t* word, | 138 const uint16_t* word, |
138 size_t len) { | 139 size_t len) { |
139 result[0] = 0; | 140 result[0] = 0; |
140 for (size_t i = 1; i < len; i++) { | 141 for (size_t i = 1; i < len; i++) { |
141 result[i] = word[i - 1] == CHAR_SOFT_HYPHEN; | 142 result[i] = word[i - 1] == CHAR_SOFT_HYPHEN; |
142 } | 143 } |
143 } | 144 } |
144 | 145 |
145 bool Hyphenator::alphabetLookup(uint16_t* alpha_codes, | 146 bool Hyphenator::alphabetLookup(uint16_t* alpha_codes, |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
184 } | 185 } |
185 alpha_codes[i + 1] = AlphabetTable1::value(entry); | 186 alpha_codes[i + 1] = AlphabetTable1::value(entry); |
186 } | 187 } |
187 alpha_codes[len + 1] = 0; | 188 alpha_codes[len + 1] = 0; |
188 return true; | 189 return true; |
189 } | 190 } |
190 return false; | 191 return false; |
191 } | 192 } |
192 | 193 |
193 /** | 194 /** |
194 * Internal implementation, after conversion to codes. All case folding and norm
alization | 195 * Internal implementation, after conversion to codes. All case folding and |
195 * has been done by now, and all characters have been found in the alphabet. | 196 * normalization has been done by now, and all characters have been found in the |
196 * Note: len here is the padded length including 0 codes at start and end. | 197 * alphabet. Note: len here is the padded length including 0 codes at start and |
| 198 * end. |
197 **/ | 199 **/ |
198 void Hyphenator::hyphenateFromCodes(uint8_t* result, | 200 void Hyphenator::hyphenateFromCodes(uint8_t* result, |
199 const uint16_t* codes, | 201 const uint16_t* codes, |
200 size_t len) { | 202 size_t len) { |
201 const Header* header = getHeader(); | 203 const Header* header = getHeader(); |
202 const Trie* trie = header->trieTable(); | 204 const Trie* trie = header->trieTable(); |
203 const Pattern* pattern = header->patternTable(); | 205 const Pattern* pattern = header->patternTable(); |
204 uint32_t char_mask = trie->char_mask; | 206 uint32_t char_mask = trie->char_mask; |
205 uint32_t link_shift = trie->link_shift; | 207 uint32_t link_shift = trie->link_shift; |
206 uint32_t link_mask = trie->link_mask; | 208 uint32_t link_mask = trie->link_mask; |
207 uint32_t pattern_shift = trie->pattern_shift; | 209 uint32_t pattern_shift = trie->pattern_shift; |
208 size_t maxOffset = len - MIN_SUFFIX - 1; | 210 size_t maxOffset = len - MIN_SUFFIX - 1; |
209 for (size_t i = 0; i < len - 1; i++) { | 211 for (size_t i = 0; i < len - 1; i++) { |
210 uint32_t node = 0; // index into Trie table | 212 uint32_t node = 0; // index into Trie table |
211 for (size_t j = i; j < len; j++) { | 213 for (size_t j = i; j < len; j++) { |
212 uint16_t c = codes[j]; | 214 uint16_t c = codes[j]; |
213 uint32_t entry = trie->data[node + c]; | 215 uint32_t entry = trie->data[node + c]; |
214 if ((entry & char_mask) == c) { | 216 if ((entry & char_mask) == c) { |
215 node = (entry & link_mask) >> link_shift; | 217 node = (entry & link_mask) >> link_shift; |
216 } else { | 218 } else { |
217 break; | 219 break; |
218 } | 220 } |
219 uint32_t pat_ix = trie->data[node] >> pattern_shift; | 221 uint32_t pat_ix = trie->data[node] >> pattern_shift; |
220 // pat_ix contains a 3-tuple of length, shift (number of trailing zeros),
and an offset | 222 // pat_ix contains a 3-tuple of length, shift (number of trailing zeros), |
221 // into the buf pool. This is the pattern for the substring (i..j) we just
matched, | 223 // and an offset into the buf pool. This is the pattern for the substring |
| 224 // (i..j) we just matched, |
222 // which we combine (via point-wise max) into the result vector. | 225 // which we combine (via point-wise max) into the result vector. |
223 if (pat_ix != 0) { | 226 if (pat_ix != 0) { |
224 uint32_t pat_entry = pattern->data[pat_ix]; | 227 uint32_t pat_entry = pattern->data[pat_ix]; |
225 int pat_len = Pattern::len(pat_entry); | 228 int pat_len = Pattern::len(pat_entry); |
226 int pat_shift = Pattern::shift(pat_entry); | 229 int pat_shift = Pattern::shift(pat_entry); |
227 const uint8_t* pat_buf = pattern->buf(pat_entry); | 230 const uint8_t* pat_buf = pattern->buf(pat_entry); |
228 int offset = j + 1 - (pat_len + pat_shift); | 231 int offset = j + 1 - (pat_len + pat_shift); |
229 // offset is the index within result that lines up with the start of pat
_buf | 232 // offset is the index within result that lines up with the start of |
| 233 // pat_buf |
230 int start = std::max(MIN_PREFIX - offset, 0); | 234 int start = std::max(MIN_PREFIX - offset, 0); |
231 int end = std::min(pat_len, (int)maxOffset - offset); | 235 int end = std::min(pat_len, (int)maxOffset - offset); |
232 for (int k = start; k < end; k++) { | 236 for (int k = start; k < end; k++) { |
233 result[offset + k] = std::max(result[offset + k], pat_buf[k]); | 237 result[offset + k] = std::max(result[offset + k], pat_buf[k]); |
234 } | 238 } |
235 } | 239 } |
236 } | 240 } |
237 } | 241 } |
238 // Since the above calculation does not modify values outside | 242 // Since the above calculation does not modify values outside |
239 // [MIN_PREFIX, len - MIN_SUFFIX], they are left as 0. | 243 // [MIN_PREFIX, len - MIN_SUFFIX], they are left as 0. |
240 for (size_t i = MIN_PREFIX; i < maxOffset; i++) { | 244 for (size_t i = MIN_PREFIX; i < maxOffset; i++) { |
241 result[i] &= 1; | 245 result[i] &= 1; |
242 } | 246 } |
243 } | 247 } |
244 | 248 |
245 } // namespace android | 249 } // namespace android |
OLD | NEW |