| OLD | NEW |
| 1 /* ***** BEGIN LICENSE BLOCK ***** | 1 /* ***** BEGIN LICENSE BLOCK ***** |
| 2 * | 2 * |
| 3 * Copyright (C) 2015 The Android Open Source Project | 3 * Copyright (C) 2015 The Android Open Source Project |
| 4 * | 4 * |
| 5 * Licensed under the Apache License, Version 2.0 (the "License"); | 5 * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 * you may not use this file except in compliance with the License. | 6 * you may not use this file except in compliance with the License. |
| 7 * You may obtain a copy of the License at | 7 * You may obtain a copy of the License at |
| 8 * | 8 * |
| 9 * http://www.apache.org/licenses/LICENSE-2.0 | 9 * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 * | 10 * |
| (...skipping 18 matching lines...) Expand all Loading... |
| 29 namespace android { | 29 namespace android { |
| 30 | 30 |
| 31 // hyb file header; implementation details are in the .cpp file | 31 // hyb file header; implementation details are in the .cpp file |
| 32 struct Header; | 32 struct Header; |
| 33 | 33 |
| 34 class Hyphenator { | 34 class Hyphenator { |
| 35 public: | 35 public: |
| 36 // Note: this will also require a locale, for proper case folding behavior | 36 // Note: this will also require a locale, for proper case folding behavior |
| 37 static Hyphenator* load(const uint16_t* patternData, size_t size); | 37 static Hyphenator* load(const uint16_t* patternData, size_t size); |
| 38 | 38 |
| 39 // Compute the hyphenation of a word, storing the hyphenation in result vector
. Each | 39 // Compute the hyphenation of a word, storing the hyphenation in result |
| 40 // entry in the vector is a "hyphen edit" to be applied at the corresponding c
ode unit | 40 // vector. Each entry in the vector is a "hyphen edit" to be applied at the |
| 41 // offset in the word. Currently 0 means no hyphen and 1 means insert hyphen a
nd break, | 41 // corresponding code unit offset in the word. Currently 0 means no hyphen and |
| 42 // but this will be expanded to other edits for nonstandard hyphenation. | 42 // 1 means insert hyphen and break, but this will be expanded to other edits |
| 43 // Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-ph
en". | 43 // for nonstandard hyphenation. Example: word is "hyphen", result is [0 0 1 0 |
| 44 // 0 0], corresponding to "hy-phen". |
| 44 void hyphenate(std::vector<uint8_t>* result, | 45 void hyphenate(std::vector<uint8_t>* result, |
| 45 const uint16_t* word, | 46 const uint16_t* word, |
| 46 size_t len); | 47 size_t len); |
| 47 | 48 |
| 48 // pattern data is in binary format, as described in doc/hyb_file_format.md. N
ote: | 49 // pattern data is in binary format, as described in doc/hyb_file_format.md. |
| 49 // the caller is responsible for ensuring that the lifetime of the pattern dat
a is | 50 // Note: the caller is responsible for ensuring that the lifetime of the |
| 50 // at least as long as the Hyphenator object. | 51 // pattern data is at least as long as the Hyphenator object. |
| 51 | 52 |
| 52 // Note: nullptr is valid input, in which case the hyphenator only processes s
oft hyphens | 53 // Note: nullptr is valid input, in which case the hyphenator only processes |
| 54 // soft hyphens |
| 53 static Hyphenator* loadBinary(const uint8_t* patternData); | 55 static Hyphenator* loadBinary(const uint8_t* patternData); |
| 54 | 56 |
| 55 private: | 57 private: |
| 56 // apply soft hyphens only, ignoring patterns | 58 // apply soft hyphens only, ignoring patterns |
| 57 void hyphenateSoft(uint8_t* result, const uint16_t* word, size_t len); | 59 void hyphenateSoft(uint8_t* result, const uint16_t* word, size_t len); |
| 58 | 60 |
| 59 // try looking up word in alphabet table, return false if any code units fail
to map | 61 // Try looking up word in alphabet table, return false if any code units fail |
| 60 // Note that this methor writes len+2 entries into alpha_codes (including star
t and stop) | 62 // to map. Note that this methor writes len+2 entries into alpha_codes |
| 63 // (including start and stop). |
| 61 bool alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len); | 64 bool alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len); |
| 62 | 65 |
| 63 // calculate hyphenation from patterns, assuming alphabet lookup has already b
een done | 66 // calculate hyphenation from patterns, assuming alphabet lookup has already |
| 67 // been done |
| 64 void hyphenateFromCodes(uint8_t* result, const uint16_t* codes, size_t len); | 68 void hyphenateFromCodes(uint8_t* result, const uint16_t* codes, size_t len); |
| 65 | 69 |
| 66 // TODO: these should become parameters, as they might vary by locale, screen
size, and | 70 // TODO: these should become parameters, as they might vary by locale, screen |
| 67 // possibly explicit user control. | 71 // size, and possibly explicit user control. |
| 68 static const int MIN_PREFIX = 2; | 72 static const int MIN_PREFIX = 2; |
| 69 static const int MIN_SUFFIX = 3; | 73 static const int MIN_SUFFIX = 3; |
| 70 | 74 |
| 71 // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is u
sed so | 75 // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is |
| 72 // that temporary buffers can be stack-allocated without waste, which is a sli
ghtly | 76 // used so that temporary buffers can be stack-allocated without waste, which |
| 73 // different use case. It measures UTF-16 code units. | 77 // is a slightly different use case. It measures UTF-16 code units. |
| 74 static const size_t MAX_HYPHENATED_SIZE = 64; | 78 static const size_t MAX_HYPHENATED_SIZE = 64; |
| 75 | 79 |
| 76 const uint8_t* patternData; | 80 const uint8_t* patternData; |
| 77 | 81 |
| 78 // accessors for binary data | 82 // accessors for binary data |
| 79 const Header* getHeader() const { | 83 const Header* getHeader() const { |
| 80 return reinterpret_cast<const Header*>(patternData); | 84 return reinterpret_cast<const Header*>(patternData); |
| 81 } | 85 } |
| 82 }; | 86 }; |
| 83 | 87 |
| 84 } // namespace android | 88 } // namespace android |
| 85 | 89 |
| 86 #endif // MINIKIN_HYPHENATOR_H | 90 #endif // MINIKIN_HYPHENATOR_H |
| OLD | NEW |