OLD | NEW |
(Empty) | |
| 1 /** |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2006, International Business Machines Corporation and others. * |
| 4 * All Rights Reserved. * |
| 5 ******************************************************************************* |
| 6 */ |
| 7 |
| 8 #ifndef DICTBE_H |
| 9 #define DICTBE_H |
| 10 |
| 11 #include "unicode/utypes.h" |
| 12 #include "unicode/uniset.h" |
| 13 #include "unicode/utext.h" |
| 14 |
| 15 #include "brkeng.h" |
| 16 |
| 17 U_NAMESPACE_BEGIN |
| 18 |
| 19 class TrieWordDictionary; |
| 20 |
| 21 /******************************************************************* |
| 22 * DictionaryBreakEngine |
| 23 */ |
| 24 |
| 25 /** |
| 26 * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a |
| 27 * dictionary to determine language-specific breaks.</p> |
| 28 * |
| 29 * <p>After it is constructed a DictionaryBreakEngine may be shared between |
| 30 * threads without synchronization.</p> |
| 31 */ |
| 32 class DictionaryBreakEngine : public LanguageBreakEngine { |
| 33 private: |
| 34 /** |
| 35 * The set of characters handled by this engine |
| 36 * @internal |
| 37 */ |
| 38 |
| 39 UnicodeSet fSet; |
| 40 |
| 41 /** |
| 42 * The set of break types handled by this engine |
| 43 * @internal |
| 44 */ |
| 45 |
| 46 uint32_t fTypes; |
| 47 |
| 48 /** |
| 49 * <p>Default constructor.</p> |
| 50 * |
| 51 */ |
| 52 DictionaryBreakEngine(); |
| 53 |
| 54 public: |
| 55 |
| 56 /** |
| 57 * <p>Constructor setting the break types handled.</p> |
| 58 * |
| 59 * @param breakTypes A bitmap of types handled by the engine. |
| 60 */ |
| 61 DictionaryBreakEngine( uint32_t breakTypes ); |
| 62 |
| 63 /** |
| 64 * <p>Virtual destructor.</p> |
| 65 */ |
| 66 virtual ~DictionaryBreakEngine(); |
| 67 |
| 68 /** |
| 69 * <p>Indicate whether this engine handles a particular character for |
| 70 * a particular kind of break.</p> |
| 71 * |
| 72 * @param c A character which begins a run that the engine might handle |
| 73 * @param breakType The type of text break which the caller wants to determine |
| 74 * @return TRUE if this engine handles the particular character and break |
| 75 * type. |
| 76 */ |
| 77 virtual UBool handles( UChar32 c, int32_t breakType ) const; |
| 78 |
| 79 /** |
| 80 * <p>Find any breaks within a run in the supplied text.</p> |
| 81 * |
| 82 * @param text A UText representing the text. The |
| 83 * iterator is left at the end of the run of characters which the engine |
| 84 * is capable of handling. |
| 85 * @param startPos The start of the run within the supplied text. |
| 86 * @param endPos The end of the run within the supplied text. |
| 87 * @param reverse Whether the caller is looking for breaks in a reverse |
| 88 * direction. |
| 89 * @param breakType The type of break desired, or -1. |
| 90 * @param foundBreaks An allocated C array of the breaks found, if any |
| 91 * @return The number of breaks found. |
| 92 */ |
| 93 virtual int32_t findBreaks( UText *text, |
| 94 int32_t startPos, |
| 95 int32_t endPos, |
| 96 UBool reverse, |
| 97 int32_t breakType, |
| 98 UStack &foundBreaks ) const; |
| 99 |
| 100 protected: |
| 101 |
| 102 /** |
| 103 * <p>Set the character set handled by this engine.</p> |
| 104 * |
| 105 * @param set A UnicodeSet of the set of characters handled by the engine |
| 106 */ |
| 107 virtual void setCharacters( const UnicodeSet &set ); |
| 108 |
| 109 /** |
| 110 * <p>Set the break types handled by this engine.</p> |
| 111 * |
| 112 * @param breakTypes A bitmap of types handled by the engine. |
| 113 */ |
| 114 // virtual void setBreakTypes( uint32_t breakTypes ); |
| 115 |
| 116 /** |
| 117 * <p>Divide up a range of known dictionary characters.</p> |
| 118 * |
| 119 * @param text A UText representing the text |
| 120 * @param rangeStart The start of the range of dictionary characters |
| 121 * @param rangeEnd The end of the range of dictionary characters |
| 122 * @param foundBreaks Output of C array of int32_t break positions, or 0 |
| 123 * @return The number of breaks found |
| 124 */ |
| 125 virtual int32_t divideUpDictionaryRange( UText *text, |
| 126 int32_t rangeStart, |
| 127 int32_t rangeEnd, |
| 128 UStack &foundBreaks ) const = 0; |
| 129 |
| 130 }; |
| 131 |
| 132 /******************************************************************* |
| 133 * ThaiBreakEngine |
| 134 */ |
| 135 |
| 136 /** |
| 137 * <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a |
| 138 * TrieWordDictionary and heuristics to determine Thai-specific breaks.</p> |
| 139 * |
| 140 * <p>After it is constructed a ThaiBreakEngine may be shared between |
| 141 * threads without synchronization.</p> |
| 142 */ |
| 143 class ThaiBreakEngine : public DictionaryBreakEngine { |
| 144 private: |
| 145 /** |
| 146 * The set of characters handled by this engine |
| 147 * @internal |
| 148 */ |
| 149 |
| 150 UnicodeSet fThaiWordSet; |
| 151 UnicodeSet fEndWordSet; |
| 152 UnicodeSet fBeginWordSet; |
| 153 UnicodeSet fSuffixSet; |
| 154 UnicodeSet fMarkSet; |
| 155 const TrieWordDictionary *fDictionary; |
| 156 |
| 157 public: |
| 158 |
| 159 /** |
| 160 * <p>Default constructor.</p> |
| 161 * |
| 162 * @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the |
| 163 * engine is deleted. |
| 164 */ |
| 165 ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status)
; |
| 166 |
| 167 /** |
| 168 * <p>Virtual destructor.</p> |
| 169 */ |
| 170 virtual ~ThaiBreakEngine(); |
| 171 |
| 172 protected: |
| 173 /** |
| 174 * <p>Divide up a range of known dictionary characters.</p> |
| 175 * |
| 176 * @param text A UText representing the text |
| 177 * @param rangeStart The start of the range of dictionary characters |
| 178 * @param rangeEnd The end of the range of dictionary characters |
| 179 * @param foundBreaks Output of C array of int32_t break positions, or 0 |
| 180 * @return The number of breaks found |
| 181 */ |
| 182 virtual int32_t divideUpDictionaryRange( UText *text, |
| 183 int32_t rangeStart, |
| 184 int32_t rangeEnd, |
| 185 UStack &foundBreaks ) const; |
| 186 |
| 187 }; |
| 188 |
| 189 |
| 190 U_NAMESPACE_END |
| 191 |
| 192 /* DICTBE_H */ |
| 193 #endif |
OLD | NEW |