OLD | NEW |
1 /** | 1 /** |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2006,2012-2013, International Business Machines Corporation * | 3 * Copyright (C) 2006-2014, International Business Machines Corporation * |
4 * and others. All Rights Reserved. * | 4 * and others. All Rights Reserved. * |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 */ | 6 */ |
7 | 7 |
8 #ifndef DICTBE_H | 8 #ifndef DICTBE_H |
9 #define DICTBE_H | 9 #define DICTBE_H |
10 | 10 |
11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
12 #include "unicode/uniset.h" | 12 #include "unicode/uniset.h" |
13 #include "unicode/utext.h" | 13 #include "unicode/utext.h" |
14 | 14 |
15 #include "brkeng.h" | 15 #include "brkeng.h" |
16 | 16 |
17 U_NAMESPACE_BEGIN | 17 U_NAMESPACE_BEGIN |
18 | 18 |
19 class DictionaryMatcher; | 19 class DictionaryMatcher; |
| 20 class Normalizer2; |
20 | 21 |
21 /******************************************************************* | 22 /******************************************************************* |
22 * DictionaryBreakEngine | 23 * DictionaryBreakEngine |
23 */ | 24 */ |
24 | 25 |
25 /** | 26 /** |
26 * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a | 27 * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a |
27 * dictionary to determine language-specific breaks.</p> | 28 * dictionary to determine language-specific breaks.</p> |
28 * | 29 * |
29 * <p>After it is constructed a DictionaryBreakEngine may be shared between | 30 * <p>After it is constructed a DictionaryBreakEngine may be shared between |
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
236 * @return The number of breaks found | 237 * @return The number of breaks found |
237 */ | 238 */ |
238 virtual int32_t divideUpDictionaryRange( UText *text, | 239 virtual int32_t divideUpDictionaryRange( UText *text, |
239 int32_t rangeStart, | 240 int32_t rangeStart, |
240 int32_t rangeEnd, | 241 int32_t rangeEnd, |
241 UStack &foundBreaks ) const; | 242 UStack &foundBreaks ) const; |
242 | 243 |
243 }; | 244 }; |
244 | 245 |
245 /******************************************************************* | 246 /******************************************************************* |
| 247 * BurmeseBreakEngine |
| 248 */ |
| 249 |
| 250 /** |
| 251 * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a |
| 252 * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p> |
| 253 * |
| 254 * <p>After it is constructed a BurmeseBreakEngine may be shared between |
| 255 * threads without synchronization.</p> |
| 256 */ |
| 257 class BurmeseBreakEngine : public DictionaryBreakEngine { |
| 258 private: |
| 259 /** |
| 260 * The set of characters handled by this engine |
| 261 * @internal |
| 262 */ |
| 263 |
| 264 UnicodeSet fBurmeseWordSet; |
| 265 UnicodeSet fEndWordSet; |
| 266 UnicodeSet fBeginWordSet; |
| 267 UnicodeSet fMarkSet; |
| 268 DictionaryMatcher *fDictionary; |
| 269 |
| 270 public: |
| 271 |
| 272 /** |
| 273 * <p>Default constructor.</p> |
| 274 * |
| 275 * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the |
| 276 * engine is deleted. |
| 277 */ |
| 278 BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); |
| 279 |
| 280 /** |
| 281 * <p>Virtual destructor.</p> |
| 282 */ |
| 283 virtual ~BurmeseBreakEngine(); |
| 284 |
| 285 protected: |
| 286 /** |
| 287 * <p>Divide up a range of known dictionary characters.</p> |
| 288 * |
| 289 * @param text A UText representing the text |
| 290 * @param rangeStart The start of the range of dictionary characters |
| 291 * @param rangeEnd The end of the range of dictionary characters |
| 292 * @param foundBreaks Output of C array of int32_t break positions, or 0 |
| 293 * @return The number of breaks found |
| 294 */ |
| 295 virtual int32_t divideUpDictionaryRange( UText *text, |
| 296 int32_t rangeStart, |
| 297 int32_t rangeEnd, |
| 298 UStack &foundBreaks ) const; |
| 299 |
| 300 }; |
| 301 |
| 302 /******************************************************************* |
246 * KhmerBreakEngine | 303 * KhmerBreakEngine |
247 */ | 304 */ |
248 | 305 |
249 /** | 306 /** |
250 * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a | 307 * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a |
251 * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> | 308 * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> |
252 * | 309 * |
253 * <p>After it is constructed a KhmerBreakEngine may be shared between | 310 * <p>After it is constructed a KhmerBreakEngine may be shared between |
254 * threads without synchronization.</p> | 311 * threads without synchronization.</p> |
255 */ | 312 */ |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
319 protected: | 376 protected: |
320 /** | 377 /** |
321 * The set of characters handled by this engine | 378 * The set of characters handled by this engine |
322 * @internal | 379 * @internal |
323 */ | 380 */ |
324 UnicodeSet fHangulWordSet; | 381 UnicodeSet fHangulWordSet; |
325 UnicodeSet fHanWordSet; | 382 UnicodeSet fHanWordSet; |
326 UnicodeSet fKatakanaWordSet; | 383 UnicodeSet fKatakanaWordSet; |
327 UnicodeSet fHiraganaWordSet; | 384 UnicodeSet fHiraganaWordSet; |
328 | 385 |
329 DictionaryMatcher *fDictionary; | 386 DictionaryMatcher *fDictionary; |
| 387 const Normalizer2 *nfkcNorm2; |
330 | 388 |
331 public: | 389 public: |
332 | 390 |
333 /** | 391 /** |
334 * <p>Default constructor.</p> | 392 * <p>Default constructor.</p> |
335 * | 393 * |
336 * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the | 394 * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the |
337 * engine is deleted. The DictionaryMatcher must contain costs for each word | 395 * engine is deleted. The DictionaryMatcher must contain costs for each word |
338 * in order for the dictionary to work properly. | 396 * in order for the dictionary to work properly. |
339 */ | 397 */ |
(...skipping 20 matching lines...) Expand all Loading... |
360 UStack &foundBreaks ) const; | 418 UStack &foundBreaks ) const; |
361 | 419 |
362 }; | 420 }; |
363 | 421 |
364 #endif | 422 #endif |
365 | 423 |
366 U_NAMESPACE_END | 424 U_NAMESPACE_END |
367 | 425 |
368 /* DICTBE_H */ | 426 /* DICTBE_H */ |
369 #endif | 427 #endif |
OLD | NEW |