OLD | NEW |
(Empty) | |
| 1 /** |
| 2 *******************************************************************************
***** |
| 3 * Copyright (C) 2006-2007, International Business Machines Corporation and othe
rs. * |
| 4 * All Rights Reserved.
* |
| 5 *******************************************************************************
***** |
| 6 */ |
| 7 |
| 8 #ifndef BRKENG_H |
| 9 #define BRKENG_H |
| 10 |
| 11 #include "unicode/utypes.h" |
| 12 #include "unicode/uobject.h" |
| 13 #include "unicode/utext.h" |
| 14 #include "unicode/uscript.h" |
| 15 |
| 16 U_NAMESPACE_BEGIN |
| 17 |
| 18 class UnicodeSet; |
| 19 class UStack; |
| 20 class CompactTrieDictionary; |
| 21 |
| 22 /******************************************************************* |
| 23 * LanguageBreakEngine |
| 24 */ |
| 25 |
| 26 /** |
| 27 * <p>LanguageBreakEngines implement language-specific knowledge for |
| 28 * finding text boundaries within a run of characters belonging to a |
| 29 * specific set. The boundaries will be of a specific kind, e.g. word, |
| 30 * line, etc.</p> |
| 31 * |
| 32 * <p>LanguageBreakEngines should normally be implemented so as to |
| 33 * be shared between threads without locking.</p> |
| 34 */ |
| 35 class LanguageBreakEngine : public UMemory { |
| 36 public: |
| 37 |
| 38 /** |
| 39 * <p>Default constructor.</p> |
| 40 * |
| 41 */ |
| 42 LanguageBreakEngine(); |
| 43 |
| 44 /** |
| 45 * <p>Virtual destructor.</p> |
| 46 */ |
| 47 virtual ~LanguageBreakEngine(); |
| 48 |
| 49 /** |
| 50 * <p>Indicate whether this engine handles a particular character for |
| 51 * a particular kind of break.</p> |
| 52 * |
| 53 * @param c A character which begins a run that the engine might handle |
| 54 * @param breakType The type of text break which the caller wants to determine |
| 55 * @return TRUE if this engine handles the particular character and break |
| 56 * type. |
| 57 */ |
| 58 virtual UBool handles(UChar32 c, int32_t breakType) const = 0; |
| 59 |
| 60 /** |
| 61 * <p>Find any breaks within a run in the supplied text.</p> |
| 62 * |
| 63 * @param text A UText representing the text. The |
| 64 * iterator is left at the end of the run of characters which the engine |
| 65 * is capable of handling. |
| 66 * @param startPos The start of the run within the supplied text. |
| 67 * @param endPos The end of the run within the supplied text. |
| 68 * @param reverse Whether the caller is looking for breaks in a reverse |
| 69 * direction. |
| 70 * @param breakType The type of break desired, or -1. |
| 71 * @param foundBreaks An allocated C array of the breaks found, if any |
| 72 * @return The number of breaks found. |
| 73 */ |
| 74 virtual int32_t findBreaks( UText *text, |
| 75 int32_t startPos, |
| 76 int32_t endPos, |
| 77 UBool reverse, |
| 78 int32_t breakType, |
| 79 UStack &foundBreaks ) const = 0; |
| 80 |
| 81 }; |
| 82 |
| 83 /******************************************************************* |
| 84 * LanguageBreakFactory |
| 85 */ |
| 86 |
| 87 /** |
| 88 * <p>LanguageBreakFactorys find and return a LanguageBreakEngine |
| 89 * that can determine breaks for characters in a specific set, if |
| 90 * such an object can be found.</p> |
| 91 * |
| 92 * <p>If a LanguageBreakFactory is to be shared between threads, |
| 93 * appropriate synchronization must be used; there is none internal |
| 94 * to the factory.</p> |
| 95 * |
| 96 * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can |
| 97 * normally be shared between threads without synchronization, unless |
| 98 * the specific subclass of LanguageBreakFactory indicates otherwise.</p> |
| 99 * |
| 100 * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine |
| 101 * it returns when it itself is deleted, unless the specific subclass of |
| 102 * LanguageBreakFactory indicates otherwise. Naturally, the factory should |
| 103 * not be deleted until the LanguageBreakEngines it has returned are no |
| 104 * longer needed.</p> |
| 105 */ |
| 106 class LanguageBreakFactory : public UMemory { |
| 107 public: |
| 108 |
| 109 /** |
| 110 * <p>Default constructor.</p> |
| 111 * |
| 112 */ |
| 113 LanguageBreakFactory(); |
| 114 |
| 115 /** |
| 116 * <p>Virtual destructor.</p> |
| 117 */ |
| 118 virtual ~LanguageBreakFactory(); |
| 119 |
| 120 /** |
| 121 * <p>Find and return a LanguageBreakEngine that can find the desired |
| 122 * kind of break for the set of characters to which the supplied |
| 123 * character belongs. It is up to the set of available engines to |
| 124 * determine what the sets of characters are.</p> |
| 125 * |
| 126 * @param c A character that begins a run for which a LanguageBreakEngine is |
| 127 * sought. |
| 128 * @param breakType The kind of text break for which a LanguageBreakEngine is |
| 129 * sought. |
| 130 * @return A LanguageBreakEngine with the desired characteristics, or 0. |
| 131 */ |
| 132 virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType)
= 0; |
| 133 |
| 134 }; |
| 135 |
| 136 /******************************************************************* |
| 137 * UnhandledEngine |
| 138 */ |
| 139 |
| 140 /** |
| 141 * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that |
| 142 * handles characters that no other LanguageBreakEngine is available to |
| 143 * handle. It is told the character and the type of break; at its |
| 144 * discretion it may handle more than the specified character (e.g., |
| 145 * the entire script to which that character belongs.</p> |
| 146 * |
| 147 * <p>UnhandledEngines may not be shared between threads without |
| 148 * external synchronization.</p> |
| 149 */ |
| 150 |
| 151 class UnhandledEngine : public LanguageBreakEngine { |
| 152 private: |
| 153 |
| 154 /** |
| 155 * The sets of characters handled, for each break type |
| 156 * @internal |
| 157 */ |
| 158 |
| 159 UnicodeSet *fHandled[4]; |
| 160 |
| 161 public: |
| 162 |
| 163 /** |
| 164 * <p>Default constructor.</p> |
| 165 * |
| 166 */ |
| 167 UnhandledEngine(UErrorCode &status); |
| 168 |
| 169 /** |
| 170 * <p>Virtual destructor.</p> |
| 171 */ |
| 172 virtual ~UnhandledEngine(); |
| 173 |
| 174 /** |
| 175 * <p>Indicate whether this engine handles a particular character for |
| 176 * a particular kind of break.</p> |
| 177 * |
| 178 * @param c A character which begins a run that the engine might handle |
| 179 * @param breakType The type of text break which the caller wants to determine |
| 180 * @return TRUE if this engine handles the particular character and break |
| 181 * type. |
| 182 */ |
| 183 virtual UBool handles(UChar32 c, int32_t breakType) const; |
| 184 |
| 185 /** |
| 186 * <p>Find any breaks within a run in the supplied text.</p> |
| 187 * |
| 188 * @param text A UText representing the text (TODO: UText). The |
| 189 * iterator is left at the end of the run of characters which the engine |
| 190 * is capable of handling. |
| 191 * @param startPos The start of the run within the supplied text. |
| 192 * @param endPos The end of the run within the supplied text. |
| 193 * @param reverse Whether the caller is looking for breaks in a reverse |
| 194 * direction. |
| 195 * @param breakType The type of break desired, or -1. |
| 196 * @param foundBreaks An allocated C array of the breaks found, if any |
| 197 * @return The number of breaks found. |
| 198 */ |
| 199 virtual int32_t findBreaks( UText *text, |
| 200 int32_t startPos, |
| 201 int32_t endPos, |
| 202 UBool reverse, |
| 203 int32_t breakType, |
| 204 UStack &foundBreaks ) const; |
| 205 |
| 206 /** |
| 207 * <p>Tell the engine to handle a particular character and break type.</p> |
| 208 * |
| 209 * @param c A character which the engine should handle |
| 210 * @param breakType The type of text break for which the engine should handle c |
| 211 */ |
| 212 virtual void handleCharacter(UChar32 c, int32_t breakType); |
| 213 |
| 214 }; |
| 215 |
| 216 /******************************************************************* |
| 217 * ICULanguageBreakFactory |
| 218 */ |
| 219 |
| 220 /** |
| 221 * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for |
| 222 * ICU. It creates dictionary-based LanguageBreakEngines from dictionary |
| 223 * data in the ICU data file.</p> |
| 224 */ |
| 225 class ICULanguageBreakFactory : public LanguageBreakFactory { |
| 226 private: |
| 227 |
| 228 /** |
| 229 * The stack of break engines created by this factory |
| 230 * @internal |
| 231 */ |
| 232 |
| 233 UStack *fEngines; |
| 234 |
| 235 public: |
| 236 |
| 237 /** |
| 238 * <p>Standard constructor.</p> |
| 239 * |
| 240 */ |
| 241 ICULanguageBreakFactory(UErrorCode &status); |
| 242 |
| 243 /** |
| 244 * <p>Virtual destructor.</p> |
| 245 */ |
| 246 virtual ~ICULanguageBreakFactory(); |
| 247 |
| 248 /** |
| 249 * <p>Find and return a LanguageBreakEngine that can find the desired |
| 250 * kind of break for the set of characters to which the supplied |
| 251 * character belongs. It is up to the set of available engines to |
| 252 * determine what the sets of characters are.</p> |
| 253 * |
| 254 * @param c A character that begins a run for which a LanguageBreakEngine is |
| 255 * sought. |
| 256 * @param breakType The kind of text break for which a LanguageBreakEngine is |
| 257 * sought. |
| 258 * @return A LanguageBreakEngine with the desired characteristics, or 0. |
| 259 */ |
| 260 virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType); |
| 261 |
| 262 protected: |
| 263 |
| 264 /** |
| 265 * <p>Create a LanguageBreakEngine for the set of characters to which |
| 266 * the supplied character belongs, for the specified break type.</p> |
| 267 * |
| 268 * @param c A character that begins a run for which a LanguageBreakEngine is |
| 269 * sought. |
| 270 * @param breakType The kind of text break for which a LanguageBreakEngine is |
| 271 * sought. |
| 272 * @return A LanguageBreakEngine with the desired characteristics, or 0. |
| 273 */ |
| 274 virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType)
; |
| 275 |
| 276 /** |
| 277 * <p>Create a CompactTrieDictionary for the specified script and break type.</
p> |
| 278 * |
| 279 * @param script An ISO 15924 script code that identifies the dictionary to be |
| 280 * created. |
| 281 * @param breakType The kind of text break for which a dictionary is |
| 282 * sought. |
| 283 * @return A CompactTrieDictionary with the desired characteristics, or 0. |
| 284 */ |
| 285 virtual const CompactTrieDictionary *loadDictionaryFor(UScriptCode script, int
32_t breakType); |
| 286 |
| 287 }; |
| 288 |
| 289 U_NAMESPACE_END |
| 290 |
| 291 /* BRKENG_H */ |
| 292 #endif |
OLD | NEW |