| OLD | NEW |
| 1 // Copyright (C) 2016 and later: Unicode, Inc. and others. | 1 // Copyright (C) 2016 and later: Unicode, Inc. and others. |
| 2 // License & terms of use: http://www.unicode.org/copyright.html | 2 // License & terms of use: http://www.unicode.org/copyright.html |
| 3 /** | 3 /** |
| 4 ******************************************************************************* | 4 ******************************************************************************* |
| 5 * Copyright (C) 2006-2016, International Business Machines Corporation | 5 * Copyright (C) 2006-2016, International Business Machines Corporation |
| 6 * and others. All Rights Reserved. | 6 * and others. All Rights Reserved. |
| 7 ******************************************************************************* | 7 ******************************************************************************* |
| 8 */ | 8 */ |
| 9 | 9 |
| 10 #include "unicode/utypes.h" | 10 #include "unicode/utypes.h" |
| (...skipping 816 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 827 | 827 |
| 828 /* | 828 /* |
| 829 ****************************************************************** | 829 ****************************************************************** |
| 830 * KhmerBreakEngine | 830 * KhmerBreakEngine |
| 831 */ | 831 */ |
| 832 | 832 |
| 833 // How many words in a row are "good enough"? | 833 // How many words in a row are "good enough"? |
| 834 static const int32_t KHMER_LOOKAHEAD = 3; | 834 static const int32_t KHMER_LOOKAHEAD = 3; |
| 835 | 835 |
| 836 // Will not combine a non-word with a preceding dictionary word longer than this | 836 // Will not combine a non-word with a preceding dictionary word longer than this |
| 837 static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3; | 837 static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 10; |
| 838 | 838 |
| 839 // Will not combine a non-word that shares at least this much prefix with a | 839 // Will not combine a non-word that shares at least this much prefix with a |
| 840 // dictionary word, with a preceding word | 840 // dictionary word, with a preceding word |
| 841 static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3; | 841 static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 5; |
| 842 | 842 |
| 843 // Minimum word size | 843 // Minimum word size |
| 844 static const int32_t KHMER_MIN_WORD = 2; | 844 static const int32_t KHMER_MIN_WORD = 2; |
| 845 | 845 |
| 846 // Minimum number of characters for two words | 846 // Minimum number of characters for two words |
| 847 static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; | 847 static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; |
| 848 | 848 |
| 849 KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
e &status) | 849 KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
e &status) |
| 850 : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), | 850 : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), |
| 851 fDictionary(adoptDictionary) | 851 fDictionary(adoptDictionary) |
| (...skipping 544 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1396 // inString goes out of scope | 1396 // inString goes out of scope |
| 1397 // inputMap goes out of scope | 1397 // inputMap goes out of scope |
| 1398 return numBreaks; | 1398 return numBreaks; |
| 1399 } | 1399 } |
| 1400 #endif | 1400 #endif |
| 1401 | 1401 |
| 1402 U_NAMESPACE_END | 1402 U_NAMESPACE_END |
| 1403 | 1403 |
| 1404 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 1404 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
| 1405 | 1405 |
| OLD | NEW |