OLD | NEW |
1 /* ***** BEGIN LICENSE BLOCK ***** | 1 /* ***** BEGIN LICENSE BLOCK ***** |
2 * | 2 * |
3 * Copyright (C) 2015 The Android Open Source Project | 3 * Copyright (C) 2015 The Android Open Source Project |
4 * | 4 * |
5 * Licensed under the Apache License, Version 2.0 (the "License"); | 5 * Licensed under the Apache License, Version 2.0 (the "License"); |
6 * you may not use this file except in compliance with the License. | 6 * you may not use this file except in compliance with the License. |
7 * You may obtain a copy of the License at | 7 * You may obtain a copy of the License at |
8 * | 8 * |
9 * http://www.apache.org/licenses/LICENSE-2.0 | 9 * http://www.apache.org/licenses/LICENSE-2.0 |
10 * | 10 * |
(...skipping 18 matching lines...) Expand all Loading... |
29 namespace android { | 29 namespace android { |
30 | 30 |
31 // hyb file header; implementation details are in the .cpp file | 31 // hyb file header; implementation details are in the .cpp file |
32 struct Header; | 32 struct Header; |
33 | 33 |
34 class Hyphenator { | 34 class Hyphenator { |
35 public: | 35 public: |
36 // Note: this will also require a locale, for proper case folding behavior | 36 // Note: this will also require a locale, for proper case folding behavior |
37 static Hyphenator* load(const uint16_t* patternData, size_t size); | 37 static Hyphenator* load(const uint16_t* patternData, size_t size); |
38 | 38 |
39 // Compute the hyphenation of a word, storing the hyphenation in result vector
. Each | 39 // Compute the hyphenation of a word, storing the hyphenation in result |
40 // entry in the vector is a "hyphen edit" to be applied at the corresponding c
ode unit | 40 // vector. Each entry in the vector is a "hyphen edit" to be applied at the |
41 // offset in the word. Currently 0 means no hyphen and 1 means insert hyphen a
nd break, | 41 // corresponding code unit offset in the word. Currently 0 means no hyphen and |
42 // but this will be expanded to other edits for nonstandard hyphenation. | 42 // 1 means insert hyphen and break, but this will be expanded to other edits |
43 // Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-ph
en". | 43 // for nonstandard hyphenation. Example: word is "hyphen", result is [0 0 1 0 |
| 44 // 0 0], corresponding to "hy-phen". |
44 void hyphenate(std::vector<uint8_t>* result, | 45 void hyphenate(std::vector<uint8_t>* result, |
45 const uint16_t* word, | 46 const uint16_t* word, |
46 size_t len); | 47 size_t len); |
47 | 48 |
48 // pattern data is in binary format, as described in doc/hyb_file_format.md. N
ote: | 49 // pattern data is in binary format, as described in doc/hyb_file_format.md. |
49 // the caller is responsible for ensuring that the lifetime of the pattern dat
a is | 50 // Note: the caller is responsible for ensuring that the lifetime of the |
50 // at least as long as the Hyphenator object. | 51 // pattern data is at least as long as the Hyphenator object. |
51 | 52 |
52 // Note: nullptr is valid input, in which case the hyphenator only processes s
oft hyphens | 53 // Note: nullptr is valid input, in which case the hyphenator only processes |
| 54 // soft hyphens |
53 static Hyphenator* loadBinary(const uint8_t* patternData); | 55 static Hyphenator* loadBinary(const uint8_t* patternData); |
54 | 56 |
55 private: | 57 private: |
56 // apply soft hyphens only, ignoring patterns | 58 // apply soft hyphens only, ignoring patterns |
57 void hyphenateSoft(uint8_t* result, const uint16_t* word, size_t len); | 59 void hyphenateSoft(uint8_t* result, const uint16_t* word, size_t len); |
58 | 60 |
59 // try looking up word in alphabet table, return false if any code units fail
to map | 61 // Try looking up word in alphabet table, return false if any code units fail |
60 // Note that this methor writes len+2 entries into alpha_codes (including star
t and stop) | 62 // to map. Note that this methor writes len+2 entries into alpha_codes |
| 63 // (including start and stop). |
61 bool alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len); | 64 bool alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len); |
62 | 65 |
63 // calculate hyphenation from patterns, assuming alphabet lookup has already b
een done | 66 // calculate hyphenation from patterns, assuming alphabet lookup has already |
| 67 // been done |
64 void hyphenateFromCodes(uint8_t* result, const uint16_t* codes, size_t len); | 68 void hyphenateFromCodes(uint8_t* result, const uint16_t* codes, size_t len); |
65 | 69 |
66 // TODO: these should become parameters, as they might vary by locale, screen
size, and | 70 // TODO: these should become parameters, as they might vary by locale, screen |
67 // possibly explicit user control. | 71 // size, and possibly explicit user control. |
68 static const int MIN_PREFIX = 2; | 72 static const int MIN_PREFIX = 2; |
69 static const int MIN_SUFFIX = 3; | 73 static const int MIN_SUFFIX = 3; |
70 | 74 |
71 // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is u
sed so | 75 // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is |
72 // that temporary buffers can be stack-allocated without waste, which is a sli
ghtly | 76 // used so that temporary buffers can be stack-allocated without waste, which |
73 // different use case. It measures UTF-16 code units. | 77 // is a slightly different use case. It measures UTF-16 code units. |
74 static const size_t MAX_HYPHENATED_SIZE = 64; | 78 static const size_t MAX_HYPHENATED_SIZE = 64; |
75 | 79 |
76 const uint8_t* patternData; | 80 const uint8_t* patternData; |
77 | 81 |
78 // accessors for binary data | 82 // accessors for binary data |
79 const Header* getHeader() const { | 83 const Header* getHeader() const { |
80 return reinterpret_cast<const Header*>(patternData); | 84 return reinterpret_cast<const Header*>(patternData); |
81 } | 85 } |
82 }; | 86 }; |
83 | 87 |
84 } // namespace android | 88 } // namespace android |
85 | 89 |
86 #endif // MINIKIN_HYPHENATOR_H | 90 #endif // MINIKIN_HYPHENATOR_H |
OLD | NEW |