| OLD | NEW |
| 1 /** | 1 /** |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * Copyright (C) 2006,2012-2013, International Business Machines Corporation * | 3 * Copyright (C) 2006-2014, International Business Machines Corporation * |
| 4 * and others. All Rights Reserved. * | 4 * and others. All Rights Reserved. * |
| 5 ******************************************************************************* | 5 ******************************************************************************* |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef DICTBE_H | 8 #ifndef DICTBE_H |
| 9 #define DICTBE_H | 9 #define DICTBE_H |
| 10 | 10 |
| 11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
| 12 #include "unicode/uniset.h" | 12 #include "unicode/uniset.h" |
| 13 #include "unicode/utext.h" | 13 #include "unicode/utext.h" |
| 14 | 14 |
| 15 #include "brkeng.h" | 15 #include "brkeng.h" |
| 16 | 16 |
| 17 U_NAMESPACE_BEGIN | 17 U_NAMESPACE_BEGIN |
| 18 | 18 |
| 19 class DictionaryMatcher; | 19 class DictionaryMatcher; |
| 20 class Normalizer2; |
| 20 | 21 |
| 21 /******************************************************************* | 22 /******************************************************************* |
| 22 * DictionaryBreakEngine | 23 * DictionaryBreakEngine |
| 23 */ | 24 */ |
| 24 | 25 |
| 25 /** | 26 /** |
| 26 * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a | 27 * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a |
| 27 * dictionary to determine language-specific breaks.</p> | 28 * dictionary to determine language-specific breaks.</p> |
| 28 * | 29 * |
| 29 * <p>After it is constructed a DictionaryBreakEngine may be shared between | 30 * <p>After it is constructed a DictionaryBreakEngine may be shared between |
| (...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 236 * @return The number of breaks found | 237 * @return The number of breaks found |
| 237 */ | 238 */ |
| 238 virtual int32_t divideUpDictionaryRange( UText *text, | 239 virtual int32_t divideUpDictionaryRange( UText *text, |
| 239 int32_t rangeStart, | 240 int32_t rangeStart, |
| 240 int32_t rangeEnd, | 241 int32_t rangeEnd, |
| 241 UStack &foundBreaks ) const; | 242 UStack &foundBreaks ) const; |
| 242 | 243 |
| 243 }; | 244 }; |
| 244 | 245 |
| 245 /******************************************************************* | 246 /******************************************************************* |
| 247 * BurmeseBreakEngine |
| 248 */ |
| 249 |
| 250 /** |
| 251 * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a |
| 252 * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p> |
| 253 * |
| 254 * <p>After it is constructed a BurmeseBreakEngine may be shared between |
| 255 * threads without synchronization.</p> |
| 256 */ |
| 257 class BurmeseBreakEngine : public DictionaryBreakEngine { |
| 258 private: |
| 259 /** |
| 260 * The set of characters handled by this engine |
| 261 * @internal |
| 262 */ |
| 263 |
| 264 UnicodeSet fBurmeseWordSet; |
| 265 UnicodeSet fEndWordSet; |
| 266 UnicodeSet fBeginWordSet; |
| 267 UnicodeSet fMarkSet; |
| 268 DictionaryMatcher *fDictionary; |
| 269 |
| 270 public: |
| 271 |
| 272 /** |
| 273 * <p>Default constructor.</p> |
| 274 * |
| 275 * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the |
| 276 * engine is deleted. |
| 277 */ |
| 278 BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); |
| 279 |
| 280 /** |
| 281 * <p>Virtual destructor.</p> |
| 282 */ |
| 283 virtual ~BurmeseBreakEngine(); |
| 284 |
| 285 protected: |
| 286 /** |
| 287 * <p>Divide up a range of known dictionary characters.</p> |
| 288 * |
| 289 * @param text A UText representing the text |
| 290 * @param rangeStart The start of the range of dictionary characters |
| 291 * @param rangeEnd The end of the range of dictionary characters |
| 292 * @param foundBreaks Output of C array of int32_t break positions, or 0 |
| 293 * @return The number of breaks found |
| 294 */ |
| 295 virtual int32_t divideUpDictionaryRange( UText *text, |
| 296 int32_t rangeStart, |
| 297 int32_t rangeEnd, |
| 298 UStack &foundBreaks ) const; |
| 299 |
| 300 }; |
| 301 |
| 302 /******************************************************************* |
| 246 * KhmerBreakEngine | 303 * KhmerBreakEngine |
| 247 */ | 304 */ |
| 248 | 305 |
| 249 /** | 306 /** |
| 250 * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a | 307 * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a |
| 251 * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> | 308 * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> |
| 252 * | 309 * |
| 253 * <p>After it is constructed a KhmerBreakEngine may be shared between | 310 * <p>After it is constructed a KhmerBreakEngine may be shared between |
| 254 * threads without synchronization.</p> | 311 * threads without synchronization.</p> |
| 255 */ | 312 */ |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 319 protected: | 376 protected: |
| 320 /** | 377 /** |
| 321 * The set of characters handled by this engine | 378 * The set of characters handled by this engine |
| 322 * @internal | 379 * @internal |
| 323 */ | 380 */ |
| 324 UnicodeSet fHangulWordSet; | 381 UnicodeSet fHangulWordSet; |
| 325 UnicodeSet fHanWordSet; | 382 UnicodeSet fHanWordSet; |
| 326 UnicodeSet fKatakanaWordSet; | 383 UnicodeSet fKatakanaWordSet; |
| 327 UnicodeSet fHiraganaWordSet; | 384 UnicodeSet fHiraganaWordSet; |
| 328 | 385 |
| 329 DictionaryMatcher *fDictionary; | 386 DictionaryMatcher *fDictionary; |
| 387 const Normalizer2 *nfkcNorm2; |
| 330 | 388 |
| 331 public: | 389 public: |
| 332 | 390 |
| 333 /** | 391 /** |
| 334 * <p>Default constructor.</p> | 392 * <p>Default constructor.</p> |
| 335 * | 393 * |
| 336 * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the | 394 * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the |
| 337 * engine is deleted. The DictionaryMatcher must contain costs for each word | 395 * engine is deleted. The DictionaryMatcher must contain costs for each word |
| 338 * in order for the dictionary to work properly. | 396 * in order for the dictionary to work properly. |
| 339 */ | 397 */ |
| (...skipping 20 matching lines...) Expand all Loading... |
| 360 UStack &foundBreaks ) const; | 418 UStack &foundBreaks ) const; |
| 361 | 419 |
| 362 }; | 420 }; |
| 363 | 421 |
| 364 #endif | 422 #endif |
| 365 | 423 |
| 366 U_NAMESPACE_END | 424 U_NAMESPACE_END |
| 367 | 425 |
| 368 /* DICTBE_H */ | 426 /* DICTBE_H */ |
| 369 #endif | 427 #endif |
| OLD | NEW |