| OLD | NEW |
| 1 /** | 1 /** |
| 2 ******************************************************************************* | 2 *******************************************************************************
*** |
| 3 * Copyright (C) 2006, International Business Machines Corporation and others. * | 3 * Copyright (C) 2006-2010, International Business Machines Corporation and othe
rs. |
| 4 * All Rights Reserved. * | 4 * All Rights Reserved. |
| 5 ******************************************************************************* | 5 *******************************************************************************
*** |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef DICTBE_H | 8 #ifndef DICTBE_H |
| 9 #define DICTBE_H | 9 #define DICTBE_H |
| 10 | 10 |
| 11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
| 12 #include "unicode/uniset.h" | 12 #include "unicode/uniset.h" |
| 13 #include "unicode/utext.h" | 13 #include "unicode/utext.h" |
| 14 | 14 |
| 15 #include "brkeng.h" | 15 #include "brkeng.h" |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 58 * | 58 * |
| 59 * @param breakTypes A bitmap of types handled by the engine. | 59 * @param breakTypes A bitmap of types handled by the engine. |
| 60 */ | 60 */ |
| 61 DictionaryBreakEngine( uint32_t breakTypes ); | 61 DictionaryBreakEngine( uint32_t breakTypes ); |
| 62 | 62 |
| 63 /** | 63 /** |
| 64 * <p>Virtual destructor.</p> | 64 * <p>Virtual destructor.</p> |
| 65 */ | 65 */ |
| 66 virtual ~DictionaryBreakEngine(); | 66 virtual ~DictionaryBreakEngine(); |
| 67 | 67 |
| 68 /** | 68 /** |
| 69 * <p>Indicate whether this engine handles a particular character for | 69 * <p>Indicate whether this engine handles a particular character for |
| 70 * a particular kind of break.</p> | 70 * a particular kind of break.</p> |
| 71 * | 71 * |
| 72 * @param c A character which begins a run that the engine might handle | 72 * @param c A character which begins a run that the engine might handle |
| 73 * @param breakType The type of text break which the caller wants to determine | 73 * @param breakType The type of text break which the caller wants to determine |
| 74 * @return TRUE if this engine handles the particular character and break | 74 * @return TRUE if this engine handles the particular character and break |
| 75 * type. | 75 * type. |
| 76 */ | 76 */ |
| 77 virtual UBool handles( UChar32 c, int32_t breakType ) const; | 77 virtual UBool handles( UChar32 c, int32_t breakType ) const; |
| 78 | 78 |
| 79 /** | 79 /** |
| 80 * <p>Find any breaks within a run in the supplied text.</p> | 80 * <p>Find any breaks within a run in the supplied text.</p> |
| 81 * | 81 * |
| 82 * @param text A UText representing the text. The | 82 * @param text A UText representing the text. The iterator is left at |
| 83 * iterator is left at the end of the run of characters which the engine | 83 * the end of the run of characters which the engine is capable of handling |
| 84 * is capable of handling. | 84 * that starts from the first (or last) character in the range. |
| 85 * @param startPos The start of the run within the supplied text. | 85 * @param startPos The start of the run within the supplied text. |
| 86 * @param endPos The end of the run within the supplied text. | 86 * @param endPos The end of the run within the supplied text. |
| 87 * @param reverse Whether the caller is looking for breaks in a reverse | 87 * @param reverse Whether the caller is looking for breaks in a reverse |
| 88 * direction. | 88 * direction. |
| 89 * @param breakType The type of break desired, or -1. | 89 * @param breakType The type of break desired, or -1. |
| 90 * @param foundBreaks An allocated C array of the breaks found, if any | 90 * @param foundBreaks An allocated C array of the breaks found, if any |
| 91 * @return The number of breaks found. | 91 * @return The number of breaks found. |
| 92 */ | 92 */ |
| 93 virtual int32_t findBreaks( UText *text, | 93 virtual int32_t findBreaks( UText *text, |
| 94 int32_t startPos, | 94 int32_t startPos, |
| 95 int32_t endPos, | 95 int32_t endPos, |
| 96 UBool reverse, | 96 UBool reverse, |
| 97 int32_t breakType, | 97 int32_t breakType, |
| 98 UStack &foundBreaks ) const; | 98 UStack &foundBreaks ) const; |
| 99 | 99 |
| 100 protected: | 100 protected: |
| 101 | 101 |
| 102 /** | 102 /** |
| 103 * <p>Set the character set handled by this engine.</p> | 103 * <p>Set the character set handled by this engine.</p> |
| 104 * | 104 * |
| 105 * @param set A UnicodeSet of the set of characters handled by the engine | 105 * @param set A UnicodeSet of the set of characters handled by the engine |
| 106 */ | 106 */ |
| 107 virtual void setCharacters( const UnicodeSet &set ); | 107 virtual void setCharacters( const UnicodeSet &set ); |
| 108 | 108 |
| 109 /** | 109 /** |
| 110 * <p>Set the break types handled by this engine.</p> | 110 * <p>Set the break types handled by this engine.</p> |
| 111 * | 111 * |
| 112 * @param breakTypes A bitmap of types handled by the engine. | 112 * @param breakTypes A bitmap of types handled by the engine. |
| 113 */ | 113 */ |
| 114 // virtual void setBreakTypes( uint32_t breakTypes ); | 114 // virtual void setBreakTypes( uint32_t breakTypes ); |
| 115 | 115 |
| 116 /** | 116 /** |
| 117 * <p>Divide up a range of known dictionary characters.</p> | 117 * <p>Divide up a range of known dictionary characters handled by this break en
gine.</p> |
| 118 * | 118 * |
| 119 * @param text A UText representing the text | 119 * @param text A UText representing the text |
| 120 * @param rangeStart The start of the range of dictionary characters | 120 * @param rangeStart The start of the range of dictionary characters |
| 121 * @param rangeEnd The end of the range of dictionary characters | 121 * @param rangeEnd The end of the range of dictionary characters |
| 122 * @param foundBreaks Output of C array of int32_t break positions, or 0 | 122 * @param foundBreaks Output of C array of int32_t break positions, or 0 |
| 123 * @return The number of breaks found | 123 * @return The number of breaks found |
| 124 */ | 124 */ |
| 125 virtual int32_t divideUpDictionaryRange( UText *text, | 125 virtual int32_t divideUpDictionaryRange( UText *text, |
| 126 int32_t rangeStart, | 126 int32_t rangeStart, |
| 127 int32_t rangeEnd, | 127 int32_t rangeEnd, |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 164 */ | 164 */ |
| 165 ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status)
; | 165 ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status)
; |
| 166 | 166 |
| 167 /** | 167 /** |
| 168 * <p>Virtual destructor.</p> | 168 * <p>Virtual destructor.</p> |
| 169 */ | 169 */ |
| 170 virtual ~ThaiBreakEngine(); | 170 virtual ~ThaiBreakEngine(); |
| 171 | 171 |
| 172 protected: | 172 protected: |
| 173 /** | 173 /** |
| 174 * <p>Divide up a range of known dictionary characters.</p> | 174 * <p>Divide up a range of known dictionary characters handled by this break en
gine.</p> |
| 175 * | 175 * |
| 176 * @param text A UText representing the text | 176 * @param text A UText representing the text |
| 177 * @param rangeStart The start of the range of dictionary characters | 177 * @param rangeStart The start of the range of dictionary characters |
| 178 * @param rangeEnd The end of the range of dictionary characters | 178 * @param rangeEnd The end of the range of dictionary characters |
| 179 * @param foundBreaks Output of C array of int32_t break positions, or 0 | 179 * @param foundBreaks Output of C array of int32_t break positions, or 0 |
| 180 * @return The number of breaks found | 180 * @return The number of breaks found |
| 181 */ | 181 */ |
| 182 virtual int32_t divideUpDictionaryRange( UText *text, | 182 virtual int32_t divideUpDictionaryRange( UText *text, |
| 183 int32_t rangeStart, | 183 int32_t rangeStart, |
| 184 int32_t rangeEnd, | 184 int32_t rangeEnd, |
| 185 UStack &foundBreaks ) const; | 185 UStack &foundBreaks ) const; |
| 186 | 186 |
| 187 }; | 187 }; |
| 188 | 188 |
| 189 /******************************************************************* |
| 190 * CjkBreakEngine |
| 191 */ |
| 192 |
| 193 //indicates language/script that the CjkBreakEngine will handle |
| 194 enum LanguageType { |
| 195 kKorean, |
| 196 kChineseJapanese |
| 197 }; |
| 198 |
| 199 /** |
| 200 * <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a |
| 201 * TrieWordDictionary with costs associated with each word and |
| 202 * Viterbi decoding to determine CJK-specific breaks.</p> |
| 203 */ |
| 204 class CjkBreakEngine : public DictionaryBreakEngine { |
| 205 protected: |
| 206 /** |
| 207 * The set of characters handled by this engine |
| 208 * @internal |
| 209 */ |
| 210 UnicodeSet fHangulWordSet; |
| 211 UnicodeSet fHanWordSet; |
| 212 UnicodeSet fKatakanaWordSet; |
| 213 UnicodeSet fHiraganaWordSet; |
| 214 |
| 215 const TrieWordDictionary *fDictionary; |
| 216 |
| 217 public: |
| 218 |
| 219 /** |
| 220 * <p>Default constructor.</p> |
| 221 * |
| 222 * @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the |
| 223 * engine is deleted. The TrieWordDictionary must contain costs for each wor
d |
| 224 * in order for the dictionary to work properly. |
| 225 */ |
| 226 CjkBreakEngine(const TrieWordDictionary *adoptDictionary, LanguageType type, U
ErrorCode &status); |
| 227 |
| 228 /** |
| 229 * <p>Virtual destructor.</p> |
| 230 */ |
| 231 virtual ~CjkBreakEngine(); |
| 232 |
| 233 protected: |
| 234 /** |
| 235 * <p>Divide up a range of known dictionary characters handled by this break
engine.</p> |
| 236 * |
| 237 * @param text A UText representing the text |
| 238 * @param rangeStart The start of the range of dictionary characters |
| 239 * @param rangeEnd The end of the range of dictionary characters |
| 240 * @param foundBreaks Output of C array of int32_t break positions, or 0 |
| 241 * @return The number of breaks found |
| 242 */ |
| 243 virtual int32_t divideUpDictionaryRange( UText *text, |
| 244 int32_t rangeStart, |
| 245 int32_t rangeEnd, |
| 246 UStack &foundBreaks ) const; |
| 247 |
| 248 }; |
| 189 | 249 |
| 190 U_NAMESPACE_END | 250 U_NAMESPACE_END |
| 191 | 251 |
| 192 /* DICTBE_H */ | 252 /* DICTBE_H */ |
| 193 #endif | 253 #endif |
| OLD | NEW |