OLD | NEW |
1 /** | 1 /** |
2 ******************************************************************************* | 2 *******************************************************************************
*** |
3 * Copyright (C) 2006, International Business Machines Corporation and others. * | 3 * Copyright (C) 2006-2010, International Business Machines Corporation and othe
rs. |
4 * All Rights Reserved. * | 4 * All Rights Reserved. |
5 ******************************************************************************* | 5 *******************************************************************************
*** |
6 */ | 6 */ |
7 | 7 |
8 #ifndef DICTBE_H | 8 #ifndef DICTBE_H |
9 #define DICTBE_H | 9 #define DICTBE_H |
10 | 10 |
11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
12 #include "unicode/uniset.h" | 12 #include "unicode/uniset.h" |
13 #include "unicode/utext.h" | 13 #include "unicode/utext.h" |
14 | 14 |
15 #include "brkeng.h" | 15 #include "brkeng.h" |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
58 * | 58 * |
59 * @param breakTypes A bitmap of types handled by the engine. | 59 * @param breakTypes A bitmap of types handled by the engine. |
60 */ | 60 */ |
61 DictionaryBreakEngine( uint32_t breakTypes ); | 61 DictionaryBreakEngine( uint32_t breakTypes ); |
62 | 62 |
63 /** | 63 /** |
64 * <p>Virtual destructor.</p> | 64 * <p>Virtual destructor.</p> |
65 */ | 65 */ |
66 virtual ~DictionaryBreakEngine(); | 66 virtual ~DictionaryBreakEngine(); |
67 | 67 |
68 /** | 68 /** |
69 * <p>Indicate whether this engine handles a particular character for | 69 * <p>Indicate whether this engine handles a particular character for |
70 * a particular kind of break.</p> | 70 * a particular kind of break.</p> |
71 * | 71 * |
72 * @param c A character which begins a run that the engine might handle | 72 * @param c A character which begins a run that the engine might handle |
73 * @param breakType The type of text break which the caller wants to determine | 73 * @param breakType The type of text break which the caller wants to determine |
74 * @return TRUE if this engine handles the particular character and break | 74 * @return TRUE if this engine handles the particular character and break |
75 * type. | 75 * type. |
76 */ | 76 */ |
77 virtual UBool handles( UChar32 c, int32_t breakType ) const; | 77 virtual UBool handles( UChar32 c, int32_t breakType ) const; |
78 | 78 |
79 /** | 79 /** |
80 * <p>Find any breaks within a run in the supplied text.</p> | 80 * <p>Find any breaks within a run in the supplied text.</p> |
81 * | 81 * |
82 * @param text A UText representing the text. The | 82 * @param text A UText representing the text. The iterator is left at |
83 * iterator is left at the end of the run of characters which the engine | 83 * the end of the run of characters which the engine is capable of handling |
84 * is capable of handling. | 84 * that starts from the first (or last) character in the range. |
85 * @param startPos The start of the run within the supplied text. | 85 * @param startPos The start of the run within the supplied text. |
86 * @param endPos The end of the run within the supplied text. | 86 * @param endPos The end of the run within the supplied text. |
87 * @param reverse Whether the caller is looking for breaks in a reverse | 87 * @param reverse Whether the caller is looking for breaks in a reverse |
88 * direction. | 88 * direction. |
89 * @param breakType The type of break desired, or -1. | 89 * @param breakType The type of break desired, or -1. |
90 * @param foundBreaks An allocated C array of the breaks found, if any | 90 * @param foundBreaks An allocated C array of the breaks found, if any |
91 * @return The number of breaks found. | 91 * @return The number of breaks found. |
92 */ | 92 */ |
93 virtual int32_t findBreaks( UText *text, | 93 virtual int32_t findBreaks( UText *text, |
94 int32_t startPos, | 94 int32_t startPos, |
95 int32_t endPos, | 95 int32_t endPos, |
96 UBool reverse, | 96 UBool reverse, |
97 int32_t breakType, | 97 int32_t breakType, |
98 UStack &foundBreaks ) const; | 98 UStack &foundBreaks ) const; |
99 | 99 |
100 protected: | 100 protected: |
101 | 101 |
102 /** | 102 /** |
103 * <p>Set the character set handled by this engine.</p> | 103 * <p>Set the character set handled by this engine.</p> |
104 * | 104 * |
105 * @param set A UnicodeSet of the set of characters handled by the engine | 105 * @param set A UnicodeSet of the set of characters handled by the engine |
106 */ | 106 */ |
107 virtual void setCharacters( const UnicodeSet &set ); | 107 virtual void setCharacters( const UnicodeSet &set ); |
108 | 108 |
109 /** | 109 /** |
110 * <p>Set the break types handled by this engine.</p> | 110 * <p>Set the break types handled by this engine.</p> |
111 * | 111 * |
112 * @param breakTypes A bitmap of types handled by the engine. | 112 * @param breakTypes A bitmap of types handled by the engine. |
113 */ | 113 */ |
114 // virtual void setBreakTypes( uint32_t breakTypes ); | 114 // virtual void setBreakTypes( uint32_t breakTypes ); |
115 | 115 |
116 /** | 116 /** |
117 * <p>Divide up a range of known dictionary characters.</p> | 117 * <p>Divide up a range of known dictionary characters handled by this break en
gine.</p> |
118 * | 118 * |
119 * @param text A UText representing the text | 119 * @param text A UText representing the text |
120 * @param rangeStart The start of the range of dictionary characters | 120 * @param rangeStart The start of the range of dictionary characters |
121 * @param rangeEnd The end of the range of dictionary characters | 121 * @param rangeEnd The end of the range of dictionary characters |
122 * @param foundBreaks Output of C array of int32_t break positions, or 0 | 122 * @param foundBreaks Output of C array of int32_t break positions, or 0 |
123 * @return The number of breaks found | 123 * @return The number of breaks found |
124 */ | 124 */ |
125 virtual int32_t divideUpDictionaryRange( UText *text, | 125 virtual int32_t divideUpDictionaryRange( UText *text, |
126 int32_t rangeStart, | 126 int32_t rangeStart, |
127 int32_t rangeEnd, | 127 int32_t rangeEnd, |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
164 */ | 164 */ |
165 ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status)
; | 165 ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status)
; |
166 | 166 |
167 /** | 167 /** |
168 * <p>Virtual destructor.</p> | 168 * <p>Virtual destructor.</p> |
169 */ | 169 */ |
170 virtual ~ThaiBreakEngine(); | 170 virtual ~ThaiBreakEngine(); |
171 | 171 |
172 protected: | 172 protected: |
173 /** | 173 /** |
174 * <p>Divide up a range of known dictionary characters.</p> | 174 * <p>Divide up a range of known dictionary characters handled by this break en
gine.</p> |
175 * | 175 * |
176 * @param text A UText representing the text | 176 * @param text A UText representing the text |
177 * @param rangeStart The start of the range of dictionary characters | 177 * @param rangeStart The start of the range of dictionary characters |
178 * @param rangeEnd The end of the range of dictionary characters | 178 * @param rangeEnd The end of the range of dictionary characters |
179 * @param foundBreaks Output of C array of int32_t break positions, or 0 | 179 * @param foundBreaks Output of C array of int32_t break positions, or 0 |
180 * @return The number of breaks found | 180 * @return The number of breaks found |
181 */ | 181 */ |
182 virtual int32_t divideUpDictionaryRange( UText *text, | 182 virtual int32_t divideUpDictionaryRange( UText *text, |
183 int32_t rangeStart, | 183 int32_t rangeStart, |
184 int32_t rangeEnd, | 184 int32_t rangeEnd, |
185 UStack &foundBreaks ) const; | 185 UStack &foundBreaks ) const; |
186 | 186 |
187 }; | 187 }; |
188 | 188 |
| 189 /******************************************************************* |
| 190 * CjkBreakEngine |
| 191 */ |
| 192 |
| 193 //indicates language/script that the CjkBreakEngine will handle |
| 194 enum LanguageType { |
| 195 kKorean, |
| 196 kChineseJapanese |
| 197 }; |
| 198 |
| 199 /** |
| 200 * <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a |
| 201 * TrieWordDictionary with costs associated with each word and |
| 202 * Viterbi decoding to determine CJK-specific breaks.</p> |
| 203 */ |
| 204 class CjkBreakEngine : public DictionaryBreakEngine { |
| 205 protected: |
| 206 /** |
| 207 * The set of characters handled by this engine |
| 208 * @internal |
| 209 */ |
| 210 UnicodeSet fHangulWordSet; |
| 211 UnicodeSet fHanWordSet; |
| 212 UnicodeSet fKatakanaWordSet; |
| 213 UnicodeSet fHiraganaWordSet; |
| 214 |
| 215 const TrieWordDictionary *fDictionary; |
| 216 |
| 217 public: |
| 218 |
| 219 /** |
| 220 * <p>Default constructor.</p> |
| 221 * |
| 222 * @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the |
| 223 * engine is deleted. The TrieWordDictionary must contain costs for each wor
d |
| 224 * in order for the dictionary to work properly. |
| 225 */ |
| 226 CjkBreakEngine(const TrieWordDictionary *adoptDictionary, LanguageType type, U
ErrorCode &status); |
| 227 |
| 228 /** |
| 229 * <p>Virtual destructor.</p> |
| 230 */ |
| 231 virtual ~CjkBreakEngine(); |
| 232 |
| 233 protected: |
| 234 /** |
| 235 * <p>Divide up a range of known dictionary characters handled by this break
engine.</p> |
| 236 * |
| 237 * @param text A UText representing the text |
| 238 * @param rangeStart The start of the range of dictionary characters |
| 239 * @param rangeEnd The end of the range of dictionary characters |
| 240 * @param foundBreaks Output of C array of int32_t break positions, or 0 |
| 241 * @return The number of breaks found |
| 242 */ |
| 243 virtual int32_t divideUpDictionaryRange( UText *text, |
| 244 int32_t rangeStart, |
| 245 int32_t rangeEnd, |
| 246 UStack &foundBreaks ) const; |
| 247 |
| 248 }; |
189 | 249 |
190 U_NAMESPACE_END | 250 U_NAMESPACE_END |
191 | 251 |
192 /* DICTBE_H */ | 252 /* DICTBE_H */ |
193 #endif | 253 #endif |
OLD | NEW |