OLD | NEW |
1 /** | 1 /** |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2006-2014, International Business Machines Corporation | 3 * Copyright (C) 2006-2014, International Business Machines Corporation |
4 * and others. All Rights Reserved. | 4 * and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 */ | 6 */ |
7 | 7 |
8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
9 | 9 |
10 #if !UCONFIG_NO_BREAK_ITERATION | 10 #if !UCONFIG_NO_BREAK_ITERATION |
(...skipping 814 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
825 | 825 |
826 /* | 826 /* |
827 ****************************************************************** | 827 ****************************************************************** |
828 * KhmerBreakEngine | 828 * KhmerBreakEngine |
829 */ | 829 */ |
830 | 830 |
831 // How many words in a row are "good enough"? | 831 // How many words in a row are "good enough"? |
832 static const int32_t KHMER_LOOKAHEAD = 3; | 832 static const int32_t KHMER_LOOKAHEAD = 3; |
833 | 833 |
834 // Will not combine a non-word with a preceding dictionary word longer than this | 834 // Will not combine a non-word with a preceding dictionary word longer than this |
835 static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3; | 835 static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 10; |
836 | 836 |
837 // Will not combine a non-word that shares at least this much prefix with a | 837 // Will not combine a non-word that shares at least this much prefix with a |
838 // dictionary word, with a preceding word | 838 // dictionary word, with a preceding word |
839 static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3; | 839 static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 5; |
840 | 840 |
841 // Minimum word size | 841 // Minimum word size |
842 static const int32_t KHMER_MIN_WORD = 2; | 842 static const int32_t KHMER_MIN_WORD = 2; |
843 | 843 |
844 // Minimum number of characters for two words | 844 // Minimum number of characters for two words |
845 static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; | 845 static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; |
846 | 846 |
847 KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
e &status) | 847 KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
e &status) |
848 : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), | 848 : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), |
849 fDictionary(adoptDictionary) | 849 fDictionary(adoptDictionary) |
(...skipping 539 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1389 delete inString; | 1389 delete inString; |
1390 delete inputMap; | 1390 delete inputMap; |
1391 return numBreaks; | 1391 return numBreaks; |
1392 } | 1392 } |
1393 #endif | 1393 #endif |
1394 | 1394 |
1395 U_NAMESPACE_END | 1395 U_NAMESPACE_END |
1396 | 1396 |
1397 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 1397 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
1398 | 1398 |
OLD | NEW |