OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2005-2013, International Business Machines | 3 * Copyright (C) 2005-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 */ | 6 */ |
7 | 7 |
8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
9 | 9 |
10 #include "cmemory.h" | 10 #include "cmemory.h" |
11 | 11 |
12 #if !UCONFIG_NO_CONVERSION | 12 #if !UCONFIG_NO_CONVERSION |
13 #include "csrsbcs.h" | 13 #include "csrsbcs.h" |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
130 | 130 |
131 // TODO - This is a bit of a hack to take care of a case | 131 // TODO - This is a bit of a hack to take care of a case |
132 // were we were getting a confidence of 135... | 132 // were we were getting a confidence of 135... |
133 if (rawPercent > 0.33) { | 133 if (rawPercent > 0.33) { |
134 return 98; | 134 return 98; |
135 } | 135 } |
136 | 136 |
137 return (int32_t) (rawPercent * 300.0); | 137 return (int32_t) (rawPercent * 300.0); |
138 } | 138 } |
139 | 139 |
140 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 140 #if !UCONFIG_ONLY_HTML_CONVERSION |
141 static const uint8_t unshapeMap_IBM420[] = { | 141 static const uint8_t unshapeMap_IBM420[] = { |
142 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -
B -C -D -E -F */ | 142 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -
B -C -D -E -F */ |
143 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x
40, 0x40, 0x40, 0x40, 0x40, | 143 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x
40, 0x40, 0x40, 0x40, 0x40, |
144 /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x
40, 0x40, 0x40, 0x40, 0x40, | 144 /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x
40, 0x40, 0x40, 0x40, 0x40, |
145 /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x
40, 0x40, 0x40, 0x40, 0x40, | 145 /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x
40, 0x40, 0x40, 0x40, 0x40, |
146 /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x
40, 0x40, 0x40, 0x40, 0x40, | 146 /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x
40, 0x40, 0x40, 0x40, 0x40, |
147 /* 4- */ 0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x
4B, 0x4C, 0x4D, 0x4E, 0x4F, | 147 /* 4- */ 0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x
4B, 0x4C, 0x4D, 0x4E, 0x4F, |
148 /* 5- */ 0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x
5B, 0x5C, 0x5D, 0x5E, 0x5F, | 148 /* 5- */ 0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x
5B, 0x5C, 0x5D, 0x5E, 0x5F, |
149 /* 6- */ 0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x
6B, 0x6C, 0x6D, 0x6E, 0x6F, | 149 /* 6- */ 0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x
6B, 0x6C, 0x6D, 0x6E, 0x6F, |
150 /* 7- */ 0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x
7B, 0x7C, 0x7D, 0x7E, 0x7F, | 150 /* 7- */ 0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x
7B, 0x7C, 0x7D, 0x7E, 0x7F, |
(...skipping 468 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
619 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, | 619 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, |
620 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | 620 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, |
621 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, | 621 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, |
622 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | 622 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, |
623 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, | 623 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, |
624 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | 624 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, |
625 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, | 625 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, |
626 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | 626 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, |
627 }; | 627 }; |
628 | 628 |
629 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 629 #if !UCONFIG_ONLY_HTML_CONVERSION |
630 static const int32_t ngrams_IBM424_he_rtl[] = { | 630 static const int32_t ngrams_IBM424_he_rtl[] = { |
631 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x4045
46, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x4056
41, | 631 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x4045
46, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x4056
41, |
632 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x4540
56, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x5140
45, | 632 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x4540
56, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x5140
45, |
633 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x5440
41, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x5540
56, | 633 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x5440
41, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x5540
56, |
634 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x6840
45, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x7140
69, | 634 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x6840
45, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x7140
69, |
635 }; | 635 }; |
636 | 636 |
637 static const int32_t ngrams_IBM424_he_ltr[] = { | 637 static const int32_t ngrams_IBM424_he_ltr[] = { |
638 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x4054
62, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x4071
41, | 638 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x4054
62, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x4071
41, |
639 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x4146
45, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x4540
54, | 639 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x4146
45, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x4540
54, |
(...skipping 512 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1152 return "ru"; | 1152 return "ru"; |
1153 } | 1153 } |
1154 | 1154 |
1155 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const | 1155 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const |
1156 { | 1156 { |
1157 int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R); | 1157 int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R); |
1158 results->set(textIn, this, confidence); | 1158 results->set(textIn, this, confidence); |
1159 return (confidence > 0); | 1159 return (confidence > 0); |
1160 } | 1160 } |
1161 | 1161 |
1162 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 1162 #if !UCONFIG_ONLY_HTML_CONVERSION |
1163 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() | 1163 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() |
1164 { | 1164 { |
1165 // nothing to do | 1165 // nothing to do |
1166 } | 1166 } |
1167 | 1167 |
1168 const char *CharsetRecog_IBM424_he::getLanguage() const | 1168 const char *CharsetRecog_IBM424_he::getLanguage() const |
1169 { | 1169 { |
1170 return "he"; | 1170 return "he"; |
1171 } | 1171 } |
1172 | 1172 |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1256 { | 1256 { |
1257 int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420
_ar); | 1257 int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420
_ar); |
1258 results->set(textIn, this, confidence); | 1258 results->set(textIn, this, confidence); |
1259 return (confidence > 0); | 1259 return (confidence > 0); |
1260 } | 1260 } |
1261 #endif | 1261 #endif |
1262 | 1262 |
1263 U_NAMESPACE_END | 1263 U_NAMESPACE_END |
1264 #endif | 1264 #endif |
1265 | 1265 |
OLD | NEW |