Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(153)

Side by Side Diff: source/i18n/csrsbcs.cpp

Issue 587833004: Turn on UCONFIG_NO_NON_HTML5_CONVERTER to save 100kB (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/icu52/
Patch Set: more tests added to desc Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/i18n/csrsbcs.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ********************************************************************** 2 **********************************************************************
3 * Copyright (C) 2005-2013, International Business Machines 3 * Copyright (C) 2005-2013, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ********************************************************************** 5 **********************************************************************
6 */ 6 */
7 7
8 #include "unicode/utypes.h" 8 #include "unicode/utypes.h"
9 9
10 #include "cmemory.h" 10 #include "cmemory.h"
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
130 130
131 // TODO - This is a bit of a hack to take care of a case 131 // TODO - This is a bit of a hack to take care of a case
132 // were we were getting a confidence of 135... 132 // were we were getting a confidence of 135...
133 if (rawPercent > 0.33) { 133 if (rawPercent > 0.33) {
134 return 98; 134 return 98;
135 } 135 }
136 136
137 return (int32_t) (rawPercent * 300.0); 137 return (int32_t) (rawPercent * 300.0);
138 } 138 }
139 139
140 #if !UCONFIG_NO_NON_HTML5_CONVERSION
140 static const uint8_t unshapeMap_IBM420[] = { 141 static const uint8_t unshapeMap_IBM420[] = {
141 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A - B -C -D -E -F */ 142 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A - B -C -D -E -F */
142 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40, 143 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40,
143 /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40, 144 /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40,
144 /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40, 145 /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40,
145 /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40, 146 /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40,
146 /* 4- */ 0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x 4B, 0x4C, 0x4D, 0x4E, 0x4F, 147 /* 4- */ 0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x 4B, 0x4C, 0x4D, 0x4E, 0x4F,
147 /* 5- */ 0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x 5B, 0x5C, 0x5D, 0x5E, 0x5F, 148 /* 5- */ 0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x 5B, 0x5C, 0x5D, 0x5E, 0x5F,
148 /* 6- */ 0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x 6B, 0x6C, 0x6D, 0x6E, 0x6F, 149 /* 6- */ 0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x 6B, 0x6C, 0x6D, 0x6E, 0x6F,
149 /* 7- */ 0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x 7B, 0x7C, 0x7D, 0x7E, 0x7F, 150 /* 7- */ 0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x 7B, 0x7C, 0x7D, 0x7E, 0x7F,
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
225 if (!(mb == 0x20 && ignoreSpace)) { 226 if (!(mb == 0x20 && ignoreSpace)) {
226 addByte(mb); 227 addByte(mb);
227 } 228 }
228 229
229 ignoreSpace = (mb == 0x20); 230 ignoreSpace = (mb == 0x20);
230 } 231 }
231 232
232 } 233 }
233 } 234 }
234 } 235 }
236 #endif
235 237
236 CharsetRecog_sbcs::CharsetRecog_sbcs() 238 CharsetRecog_sbcs::CharsetRecog_sbcs()
237 { 239 {
238 // nothing else to do 240 // nothing else to do
239 } 241 }
240 242
241 CharsetRecog_sbcs::~CharsetRecog_sbcs() 243 CharsetRecog_sbcs::~CharsetRecog_sbcs()
242 { 244 {
243 // nothing to do 245 // nothing to do
244 } 246 }
(...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after
617 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 619 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
618 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 620 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
619 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 621 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
620 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 622 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
621 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 623 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
622 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 624 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
623 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 625 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
624 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 626 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
625 }; 627 };
626 628
629 #if !UCONFIG_NO_NON_HTML5_CONVERSION
627 static const int32_t ngrams_IBM424_he_rtl[] = { 630 static const int32_t ngrams_IBM424_he_rtl[] = {
628 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x4045 46, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x4056 41, 631 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x4045 46, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x4056 41,
629 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x4540 56, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x5140 45, 632 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x4540 56, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x5140 45,
630 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x5440 41, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x5540 56, 633 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x5440 41, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x5540 56,
631 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x6840 45, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x7140 69, 634 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x6840 45, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x7140 69,
632 }; 635 };
633 636
634 static const int32_t ngrams_IBM424_he_ltr[] = { 637 static const int32_t ngrams_IBM424_he_ltr[] = {
635 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x4054 62, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x4071 41, 638 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x4054 62, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x4071 41,
636 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x4146 45, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x4540 54, 639 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x4146 45, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x4540 54,
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
684 /* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40, 687 /* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x 40, 0x40, 0x40, 0x40, 0x40,
685 /* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x 8B, 0x8C, 0x8D, 0x8E, 0x8F, 688 /* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x 8B, 0x8C, 0x8D, 0x8E, 0x8F,
686 /* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x 9B, 0x9C, 0x9D, 0x9E, 0x9F, 689 /* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x 9B, 0x9C, 0x9D, 0x9E, 0x9F,
687 /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0x AB, 0xAC, 0xAD, 0xAE, 0xAF, 690 /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0x AB, 0xAC, 0xAD, 0xAE, 0xAF,
688 /* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0x BB, 0xBC, 0xBD, 0xBE, 0xBF, 691 /* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0x BB, 0xBC, 0xBD, 0xBE, 0xBF,
689 /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x CB, 0x40, 0xCD, 0x40, 0xCF, 692 /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x CB, 0x40, 0xCD, 0x40, 0xCF,
690 /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0x DB, 0xDC, 0xDD, 0xDE, 0xDF, 693 /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0x DB, 0xDC, 0xDD, 0xDE, 0xDF,
691 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0x EB, 0x40, 0xED, 0xEE, 0xEF, 694 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0x EB, 0x40, 0xED, 0xEE, 0xEF,
692 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x FB, 0xFC, 0xFD, 0xFE, 0x40, 695 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x FB, 0xFC, 0xFD, 0xFE, 0x40,
693 }; 696 };
697 #endif
694 698
695 //ISO-8859-1,2,5,6,7,8,9 Ngrams 699 //ISO-8859-1,2,5,6,7,8,9 Ngrams
696 700
697 struct NGramsPlusLang { 701 struct NGramsPlusLang {
698 const int32_t ngrams[64]; 702 const int32_t ngrams[64];
699 const char * lang; 703 const char * lang;
700 }; 704 };
701 705
702 static const NGramsPlusLang ngrams_8859_1[] = { 706 static const NGramsPlusLang ngrams_8859_1[] = {
703 { 707 {
(...skipping 444 matching lines...) Expand 10 before | Expand all | Expand 10 after
1148 return "ru"; 1152 return "ru";
1149 } 1153 }
1150 1154
1151 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const 1155 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
1152 { 1156 {
1153 int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R); 1157 int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
1154 results->set(textIn, this, confidence); 1158 results->set(textIn, this, confidence);
1155 return (confidence > 0); 1159 return (confidence > 0);
1156 } 1160 }
1157 1161
1162 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1158 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() 1163 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
1159 { 1164 {
1160 // nothing to do 1165 // nothing to do
1161 } 1166 }
1162 1167
1163 const char *CharsetRecog_IBM424_he::getLanguage() const 1168 const char *CharsetRecog_IBM424_he::getLanguage() const
1164 { 1169 {
1165 return "he"; 1170 return "he";
1166 } 1171 }
1167 1172
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
1246 { 1251 {
1247 return "IBM420_ltr"; 1252 return "IBM420_ltr";
1248 } 1253 }
1249 1254
1250 UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results ) const 1255 UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results ) const
1251 { 1256 {
1252 int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420 _ar); 1257 int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420 _ar);
1253 results->set(textIn, this, confidence); 1258 results->set(textIn, this, confidence);
1254 return (confidence > 0); 1259 return (confidence > 0);
1255 } 1260 }
1261 #endif
1256 1262
1257 U_NAMESPACE_END 1263 U_NAMESPACE_END
1258 #endif 1264 #endif
1259 1265
OLDNEW
« no previous file with comments | « source/i18n/csrsbcs.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698