Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(478)

Side by Side Diff: source/i18n/csrsbcs.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/csrsbcs.h ('k') | source/i18n/currpinf.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ********************************************************************** 2 **********************************************************************
3 * Copyright (C) 2005-2013, International Business Machines 3 * Copyright (C) 2005-2015, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ********************************************************************** 5 **********************************************************************
6 */ 6 */
7 7
8 #include "unicode/utypes.h" 8 #include "unicode/utypes.h"
9 9
10 #include "cmemory.h" 10 #include "cmemory.h"
11 11
12 #if !UCONFIG_NO_CONVERSION 12 #if !UCONFIG_NO_CONVERSION
13 #include "csrsbcs.h" 13 #include "csrsbcs.h"
14 #include "csmatch.h" 14 #include "csmatch.h"
15 15
16 #define N_GRAM_SIZE 3 16 #define N_GRAM_SIZE 3
17 #define N_GRAM_MASK 0xFFFFFF 17 #define N_GRAM_MASK 0xFFFFFF
18 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) 18 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
19 19
20 U_NAMESPACE_BEGIN 20 U_NAMESPACE_BEGIN
21 21
22 NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap) 22 NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
23 : ngram(0), byteIndex(0) 23 : ngram(0), byteIndex(0)
24 { 24 {
25 ngramList = theNgramList; 25 ngramList = theNgramList;
26 charMap = theCharMap; 26 charMap = theCharMap;
27 27
28 ngramCount = hitCount = 0; 28 ngramCount = hitCount = 0;
29 } 29 }
30 30
31 NGramParser::~NGramParser()
32 {
33 }
34
31 /* 35 /*
32 * Binary search for value in table, which must have exactly 64 entries. 36 * Binary search for value in table, which must have exactly 64 entries.
33 */ 37 */
34 38
35 int32_t NGramParser::search(const int32_t *table, int32_t value) 39 int32_t NGramParser::search(const int32_t *table, int32_t value)
36 { 40 {
37 int32_t index = 0; 41 int32_t index = 0;
38 42
39 if (table[index + 32] <= value) { 43 if (table[index + 32] <= value) {
40 index += 32; 44 index += 32;
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
130 134
131 // TODO - This is a bit of a hack to take care of a case 135 // TODO - This is a bit of a hack to take care of a case
132 // were we were getting a confidence of 135... 136 // were we were getting a confidence of 135...
133 if (rawPercent > 0.33) { 137 if (rawPercent > 0.33) {
134 return 98; 138 return 98;
135 } 139 }
136 140
137 return (int32_t) (rawPercent * 300.0); 141 return (int32_t) (rawPercent * 300.0);
138 } 142 }
139 143
140 #if !UCONFIG_NO_NON_HTML5_CONVERSION 144 #if !UCONFIG_ONLY_HTML_CONVERSION
141 static const uint8_t unshapeMap_IBM420[] = { 145 static const uint8_t unshapeMap_IBM420[] = {
142 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A - B -C -D -E -F */ 146 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A - B -C -D -E -F */
143 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40, 147 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40,
144 /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40, 148 /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40,
145 /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40, 149 /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40,
146 /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40, 150 /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40,
147 /* 4- */ 0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x 4B, 0x4C, 0x4D, 0x4E, 0x4F, 151 /* 4- */ 0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x 4B, 0x4C, 0x4D, 0x4E, 0x4F,
148 /* 5- */ 0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x 5B, 0x5C, 0x5D, 0x5E, 0x5F, 152 /* 5- */ 0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x 5B, 0x5C, 0x5D, 0x5E, 0x5F,
149 /* 6- */ 0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x 6B, 0x6C, 0x6D, 0x6E, 0x6F, 153 /* 6- */ 0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x 6B, 0x6C, 0x6D, 0x6E, 0x6F,
150 /* 7- */ 0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x 7B, 0x7C, 0x7D, 0x7E, 0x7F, 154 /* 7- */ 0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x 7B, 0x7C, 0x7D, 0x7E, 0x7F,
151 /* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x 8B, 0x8B, 0x8D, 0x8D, 0x8F, 155 /* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x 8B, 0x8B, 0x8D, 0x8D, 0x8F,
152 /* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x 9A, 0x9A, 0x9A, 0x9E, 0x9E, 156 /* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x 9A, 0x9A, 0x9A, 0x9E, 0x9E,
153 /* A- */ 0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0x AB, 0xAB, 0xAD, 0xAD, 0xAF, 157 /* A- */ 0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0x AB, 0xAB, 0xAD, 0xAD, 0xAF,
154 /* B- */ 0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0x BB, 0xBB, 0xBD, 0xBD, 0xBF, 158 /* B- */ 0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0x BB, 0xBB, 0xBD, 0xBD, 0xBF,
155 /* C- */ 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0x BF, 0xCC, 0xBF, 0xCE, 0xCF, 159 /* C- */ 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0x BF, 0xCC, 0xBF, 0xCE, 0xCF,
156 /* D- */ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0x DA, 0xDC, 0xDC, 0xDC, 0xDF, 160 /* D- */ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0x DA, 0xDC, 0xDC, 0xDC, 0xDF,
157 /* E- */ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0x EB, 0xEC, 0xED, 0xEE, 0xEF, 161 /* E- */ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0x EB, 0xEC, 0xED, 0xEE, 0xEF,
158 /* F- */ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0x FB, 0xFC, 0xFD, 0xFE, 0xFF, 162 /* F- */ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0x FB, 0xFC, 0xFD, 0xFE, 0xFF,
159 }; 163 };
160 164
161 NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_ t *theCharMap):NGramParser(theNgramList, theCharMap) 165 NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_ t *theCharMap):NGramParser(theNgramList, theCharMap)
162 { 166 {
163 alef = 0x00; 167 alef = 0x00;
164 } 168 }
165 169
170 NGramParser_IBM420::~NGramParser_IBM420() {}
166 171
167 int32_t NGramParser_IBM420::isLamAlef(int32_t b) 172 int32_t NGramParser_IBM420::isLamAlef(int32_t b)
168 { 173 {
169 if(b == 0xB2 || b == 0xB3){ 174 if(b == 0xB2 || b == 0xB3){
170 return 0x47; 175 return 0x47;
171 }else if(b == 0xB4 || b == 0xB5){ 176 }else if(b == 0xB4 || b == 0xB5){
172 return 0x49; 177 return 0x49;
173 }else if(b == 0xB8 || b == 0xB9){ 178 }else if(b == 0xB8 || b == 0xB9){
174 return 0x56; 179 return 0x56;
175 }else 180 }else
(...skipping 443 matching lines...) Expand 10 before | Expand all | Expand 10 after
619 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 624 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
620 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 625 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
621 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 626 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
622 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 627 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
623 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 628 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
624 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 629 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
625 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 630 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
626 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 631 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
627 }; 632 };
628 633
629 #if !UCONFIG_NO_NON_HTML5_CONVERSION 634 #if !UCONFIG_ONLY_HTML_CONVERSION
630 static const int32_t ngrams_IBM424_he_rtl[] = { 635 static const int32_t ngrams_IBM424_he_rtl[] = {
631 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x4045 46, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x4056 41, 636 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x4045 46, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x4056 41,
632 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x4540 56, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x5140 45, 637 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x4540 56, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x5140 45,
633 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x5440 41, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x5540 56, 638 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x5440 41, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x5540 56,
634 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x6840 45, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x7140 69, 639 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x6840 45, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x7140 69,
635 }; 640 };
636 641
637 static const int32_t ngrams_IBM424_he_ltr[] = { 642 static const int32_t ngrams_IBM424_he_ltr[] = {
638 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x4054 62, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x4071 41, 643 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x4054 62, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x4071 41,
639 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x4146 45, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x4540 54, 644 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x4146 45, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x4540 54,
(...skipping 512 matching lines...) Expand 10 before | Expand all | Expand 10 after
1152 return "ru"; 1157 return "ru";
1153 } 1158 }
1154 1159
1155 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const 1160 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
1156 { 1161 {
1157 int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R); 1162 int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
1158 results->set(textIn, this, confidence); 1163 results->set(textIn, this, confidence);
1159 return (confidence > 0); 1164 return (confidence > 0);
1160 } 1165 }
1161 1166
1162 #if !UCONFIG_NO_NON_HTML5_CONVERSION 1167 #if !UCONFIG_ONLY_HTML_CONVERSION
1163 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() 1168 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
1164 { 1169 {
1165 // nothing to do 1170 // nothing to do
1166 } 1171 }
1167 1172
1168 const char *CharsetRecog_IBM424_he::getLanguage() const 1173 const char *CharsetRecog_IBM424_he::getLanguage() const
1169 { 1174 {
1170 return "he"; 1175 return "he";
1171 } 1176 }
1172 1177
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
1256 { 1261 {
1257 int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420 _ar); 1262 int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420 _ar);
1258 results->set(textIn, this, confidence); 1263 results->set(textIn, this, confidence);
1259 return (confidence > 0); 1264 return (confidence > 0);
1260 } 1265 }
1261 #endif 1266 #endif
1262 1267
1263 U_NAMESPACE_END 1268 U_NAMESPACE_END
1264 #endif 1269 #endif
1265 1270
OLDNEW
« no previous file with comments | « source/i18n/csrsbcs.h ('k') | source/i18n/currpinf.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698