OLD | NEW |
1 // Copyright (C) 2016 and later: Unicode, Inc. and others. | 1 // Copyright (C) 2016 and later: Unicode, Inc. and others. |
2 // License & terms of use: http://www.unicode.org/copyright.html | 2 // License & terms of use: http://www.unicode.org/copyright.html |
3 /** | 3 /** |
4 ******************************************************************************* | 4 ******************************************************************************* |
5 * Copyright (C) 2006-2016, International Business Machines Corporation | 5 * Copyright (C) 2006-2016, International Business Machines Corporation |
6 * and others. All Rights Reserved. | 6 * and others. All Rights Reserved. |
7 ******************************************************************************* | 7 ******************************************************************************* |
8 */ | 8 */ |
9 | 9 |
10 #include "unicode/utypes.h" | 10 #include "unicode/utypes.h" |
(...skipping 816 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
827 | 827 |
828 /* | 828 /* |
829 ****************************************************************** | 829 ****************************************************************** |
830 * KhmerBreakEngine | 830 * KhmerBreakEngine |
831 */ | 831 */ |
832 | 832 |
833 // How many words in a row are "good enough"? | 833 // How many words in a row are "good enough"? |
834 static const int32_t KHMER_LOOKAHEAD = 3; | 834 static const int32_t KHMER_LOOKAHEAD = 3; |
835 | 835 |
836 // Will not combine a non-word with a preceding dictionary word longer than this | 836 // Will not combine a non-word with a preceding dictionary word longer than this |
837 static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3; | 837 static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 10; |
838 | 838 |
839 // Will not combine a non-word that shares at least this much prefix with a | 839 // Will not combine a non-word that shares at least this much prefix with a |
840 // dictionary word, with a preceding word | 840 // dictionary word, with a preceding word |
841 static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3; | 841 static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 5; |
842 | 842 |
843 // Minimum word size | 843 // Minimum word size |
844 static const int32_t KHMER_MIN_WORD = 2; | 844 static const int32_t KHMER_MIN_WORD = 2; |
845 | 845 |
846 // Minimum number of characters for two words | 846 // Minimum number of characters for two words |
847 static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; | 847 static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; |
848 | 848 |
849 KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
e &status) | 849 KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
e &status) |
850 : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), | 850 : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), |
851 fDictionary(adoptDictionary) | 851 fDictionary(adoptDictionary) |
(...skipping 544 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1396 // inString goes out of scope | 1396 // inString goes out of scope |
1397 // inputMap goes out of scope | 1397 // inputMap goes out of scope |
1398 return numBreaks; | 1398 return numBreaks; |
1399 } | 1399 } |
1400 #endif | 1400 #endif |
1401 | 1401 |
1402 U_NAMESPACE_END | 1402 U_NAMESPACE_END |
1403 | 1403 |
1404 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 1404 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
1405 | 1405 |
OLD | NEW |