OLD | NEW |
(Empty) | |
| 1 /* ***** BEGIN LICENSE BLOCK ***** |
| 2 * |
| 3 * Copyright (C) 2015 The Android Open Source Project |
| 4 * |
| 5 * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 * you may not use this file except in compliance with the License. |
| 7 * You may obtain a copy of the License at |
| 8 * |
| 9 * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 * |
| 11 * Unless required by applicable law or agreed to in writing, software |
| 12 * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 * See the License for the specific language governing permissions and |
| 15 * limitations under the License. |
| 16 * |
| 17 * ***** END LICENSE BLOCK ***** */ |
| 18 |
| 19 /** |
| 20 * An implementation of Liang's hyphenation algorithm. |
| 21 */ |
| 22 |
| 23 #include <memory> |
| 24 #include <unordered_map> |
| 25 |
| 26 #ifndef MINIKIN_HYPHENATOR_H |
| 27 #define MINIKIN_HYPHENATOR_H |
| 28 |
| 29 namespace android { |
| 30 |
| 31 // hyb file header; implementation details are in the .cpp file |
| 32 struct Header; |
| 33 |
| 34 class Hyphenator { |
| 35 public: |
| 36 // Note: this will also require a locale, for proper case folding behavior |
| 37 static Hyphenator* load(const uint16_t* patternData, size_t size); |
| 38 |
| 39 // Compute the hyphenation of a word, storing the hyphenation in result vect
or. Each |
| 40 // entry in the vector is a "hyphen edit" to be applied at the corresponding
code unit |
| 41 // offset in the word. Currently 0 means no hyphen and 1 means insert hyphen
and break, |
| 42 // but this will be expanded to other edits for nonstandard hyphenation. |
| 43 // Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-
phen". |
| 44 void hyphenate(std::vector<uint8_t>* result, const uint16_t* word, size_t le
n); |
| 45 |
| 46 // pattern data is in binary format, as described in doc/hyb_file_format.md.
Note: |
| 47 // the caller is responsible for ensuring that the lifetime of the pattern d
ata is |
| 48 // at least as long as the Hyphenator object. |
| 49 |
| 50 // Note: nullptr is valid input, in which case the hyphenator only processes
soft hyphens |
| 51 static Hyphenator* loadBinary(const uint8_t* patternData); |
| 52 |
| 53 private: |
| 54 // apply soft hyphens only, ignoring patterns |
| 55 void hyphenateSoft(uint8_t* result, const uint16_t* word, size_t len); |
| 56 |
| 57 // try looking up word in alphabet table, return false if any code units fai
l to map |
| 58 // Note that this methor writes len+2 entries into alpha_codes (including st
art and stop) |
| 59 bool alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len)
; |
| 60 |
| 61 // calculate hyphenation from patterns, assuming alphabet lookup has already
been done |
| 62 void hyphenateFromCodes(uint8_t* result, const uint16_t* codes, size_t len); |
| 63 |
| 64 // TODO: these should become parameters, as they might vary by locale, scree
n size, and |
| 65 // possibly explicit user control. |
| 66 static const int MIN_PREFIX = 2; |
| 67 static const int MIN_SUFFIX = 3; |
| 68 |
| 69 // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is
used so |
| 70 // that temporary buffers can be stack-allocated without waste, which is a s
lightly |
| 71 // different use case. It measures UTF-16 code units. |
| 72 static const size_t MAX_HYPHENATED_SIZE = 64; |
| 73 |
| 74 const uint8_t* patternData; |
| 75 |
| 76 // accessors for binary data |
| 77 const Header* getHeader() const { |
| 78 return reinterpret_cast<const Header*>(patternData); |
| 79 } |
| 80 |
| 81 }; |
| 82 |
| 83 } // namespace android |
| 84 |
| 85 #endif // MINIKIN_HYPHENATOR_H |
OLD | NEW |