Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(206)

Side by Side Diff: Source/wtf/text/TextCodecICU.cpp

Issue 1167523003: Define a variable to distinguish system_icu from bundled_icu in Blink (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: update Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
64 // We register Hebrew with logical ordering using a separate name. 64 // We register Hebrew with logical ordering using a separate name.
65 // Otherwise, this would share the same canonical name as the 65 // Otherwise, this would share the same canonical name as the
66 // visual ordering case, and then TextEncoding could not tell them 66 // visual ordering case, and then TextEncoding could not tell them
67 // apart; ICU treats these names as synonyms. 67 // apart; ICU treats these names as synonyms.
68 registrar("ISO-8859-8-I", "ISO-8859-8-I"); 68 registrar("ISO-8859-8-I", "ISO-8859-8-I");
69 69
70 int32_t numEncodings = ucnv_countAvailable(); 70 int32_t numEncodings = ucnv_countAvailable();
71 for (int32_t i = 0; i < numEncodings; ++i) { 71 for (int32_t i = 0; i < numEncodings; ++i) {
72 const char* name = ucnv_getAvailableName(i); 72 const char* name = ucnv_getAvailableName(i);
73 UErrorCode error = U_ZERO_ERROR; 73 UErrorCode error = U_ZERO_ERROR;
74 // Try MIME before trying IANA to pick up commonly used names like 74 #if !defined(USING_SYSTEM_ICU)
75 // 'EUC-JP' instead of horrendously long names like 75 const char* primaryStandard = "HTML";
76 // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. 76 const char* secondaryStandard = "MIME";
77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); 77 #else
78 if (!U_SUCCESS(error) || !standardName) { 78 const char* primaryStandard = "MIME";
79 const char* secondaryStandard = "IANA";
80 #endif
81 const char* standardName = ucnv_getStandardName(name, primaryStandard, & error);
82 if (U_FAILURE(error) || !standardName) {
79 error = U_ZERO_ERROR; 83 error = U_ZERO_ERROR;
80 // Try IANA to pick up 'windows-12xx' and other names 84 // Try IANA to pick up 'windows-12xx' and other names
81 // which are not preferred MIME names but are widely used. 85 // which are not preferred MIME names but are widely used.
82 standardName = ucnv_getStandardName(name, "IANA", &error); 86 standardName = ucnv_getStandardName(name, secondaryStandard, &error) ;
83 if (!U_SUCCESS(error) || !standardName) 87 if (U_FAILURE(error) || !standardName)
84 continue; 88 continue;
85 } 89 }
86 90
87 // A number of these aliases are handled in Chrome's copy of ICU, but 91 // A number of these aliases are handled in Chrome's copy of ICU, but
88 // Chromium can be compiled with the system ICU. 92 // Chromium can be compiled with the system ICU.
89 93
90 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers. 94 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
91 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding 95 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
92 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too. 96 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too.
97 #if defined(USING_SYSTEM_ICU)
93 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80 ")) 98 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80 "))
94 standardName = "GBK"; 99 standardName = "GBK";
95 // Similarly, EUC-KR encodings all map to an extended version, but 100 // Similarly, EUC-KR encodings all map to an extended version, but
96 // per HTML5, the canonical name still should be EUC-KR. 101 // per HTML5, the canonical name still should be EUC-KR.
97 else if (!strcmp(standardName, "EUC-KR") || !strcmp(standardName, "KSC_5 601") || !strcmp(standardName, "cp1363")) 102 else if (!strcmp(standardName, "EUC-KR") || !strcmp(standardName, "KSC_5 601") || !strcmp(standardName, "cp1363"))
98 standardName = "EUC-KR"; 103 standardName = "EUC-KR";
99 // And so on. 104 // And so on.
100 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return ed in different case by ICU 3.2 and 3.6. 105 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return ed in different case by ICU 3.2 and 3.6.
101 standardName = "windows-1254"; 106 standardName = "windows-1254";
102 else if (!strcmp(standardName, "TIS-620")) 107 else if (!strcmp(standardName, "TIS-620"))
103 standardName = "windows-874"; 108 standardName = "windows-874";
109 #endif
104 110
105 registrar(standardName, standardName); 111 registrar(standardName, standardName);
106 112
107 uint16_t numAliases = ucnv_countAliases(name, &error); 113 uint16_t numAliases = ucnv_countAliases(name, &error);
108 ASSERT(U_SUCCESS(error)); 114 ASSERT(U_SUCCESS(error));
109 if (U_SUCCESS(error)) 115 if (U_SUCCESS(error))
110 for (uint16_t j = 0; j < numAliases; ++j) { 116 for (uint16_t j = 0; j < numAliases; ++j) {
111 error = U_ZERO_ERROR; 117 error = U_ZERO_ERROR;
112 const char* alias = ucnv_getAlias(name, j, &error); 118 const char* alias = ucnv_getAlias(name, j, &error);
113 ASSERT(U_SUCCESS(error)); 119 ASSERT(U_SUCCESS(error));
114 if (U_SUCCESS(error) && alias != standardName) 120 if (U_SUCCESS(error) && alias != standardName)
115 registrar(alias, standardName); 121 registrar(alias, standardName);
116 } 122 }
117 } 123 }
118 124
125 // These two entries have to be added here because ICU's converter table
126 // cannot have both ISO-8859-8-I and ISO-8859-8.
127 registrar("csISO88598I", "ISO-8859-8-I");
128 registrar("logical", "ISO-8859-8-I");
129
130 #if defined(USING_SYSTEM_ICU)
119 // Additional alias for MacCyrillic not present in ICU. 131 // Additional alias for MacCyrillic not present in ICU.
120 registrar("maccyrillic", "x-mac-cyrillic"); 132 registrar("maccyrillic", "x-mac-cyrillic");
121 133
122 // Additional aliases that historically were present in the encoding 134 // Additional aliases that historically were present in the encoding
123 // table in WebKit on Macintosh that don't seem to be present in ICU. 135 // table in WebKit on Macintosh that don't seem to be present in ICU.
124 // Perhaps we can prove these are not used on the web and remove them. 136 // Perhaps we can prove these are not used on the web and remove them.
125 // Or perhaps we can get them added to ICU. 137 // Or perhaps we can get them added to ICU.
126 registrar("x-mac-roman", "macintosh"); 138 registrar("x-mac-roman", "macintosh");
127 registrar("x-mac-ukrainian", "x-mac-cyrillic"); 139 registrar("x-mac-ukrainian", "x-mac-cyrillic");
128 registrar("cn-big5", "Big5"); 140 registrar("cn-big5", "Big5");
129 registrar("x-x-big5", "Big5"); 141 registrar("x-x-big5", "Big5");
130 registrar("cn-gb", "GBK"); 142 registrar("cn-gb", "GBK");
131 registrar("csgb231280", "GBK"); 143 registrar("csgb231280", "GBK");
132 registrar("x-euc-cn", "GBK"); 144 registrar("x-euc-cn", "GBK");
133 registrar("x-gbk", "GBK"); 145 registrar("x-gbk", "GBK");
134 registrar("csISO88598I", "ISO-8859-8-I");
135 registrar("koi", "KOI8-R"); 146 registrar("koi", "KOI8-R");
136 registrar("logical", "ISO-8859-8-I");
137 registrar("visual", "ISO-8859-8"); 147 registrar("visual", "ISO-8859-8");
138 registrar("winarabic", "windows-1256"); 148 registrar("winarabic", "windows-1256");
139 registrar("winbaltic", "windows-1257"); 149 registrar("winbaltic", "windows-1257");
140 registrar("wincyrillic", "windows-1251"); 150 registrar("wincyrillic", "windows-1251");
141 registrar("iso-8859-11", "windows-874"); 151 registrar("iso-8859-11", "windows-874");
142 registrar("iso8859-11", "windows-874"); 152 registrar("iso8859-11", "windows-874");
143 registrar("dos-874", "windows-874"); 153 registrar("dos-874", "windows-874");
144 registrar("wingreek", "windows-1253"); 154 registrar("wingreek", "windows-1253");
145 registrar("winhebrew", "windows-1255"); 155 registrar("winhebrew", "windows-1255");
146 registrar("winlatin2", "windows-1250"); 156 registrar("winlatin2", "windows-1250");
(...skipping 22 matching lines...) Expand all
169 registrar("ISO8859-13", "ISO-8859-13"); 179 registrar("ISO8859-13", "ISO-8859-13");
170 registrar("ISO8859-14", "ISO-8859-14"); 180 registrar("ISO8859-14", "ISO-8859-14");
171 registrar("ISO8859-15", "ISO-8859-15"); 181 registrar("ISO8859-15", "ISO-8859-15");
172 // No need to have an entry for ISO8859-16. ISO-8859-16 has just one label 182 // No need to have an entry for ISO8859-16. ISO-8859-16 has just one label
173 // listed in WHATWG Encoding Living Standard (http://encoding.spec.whatwg.or g/ ). 183 // listed in WHATWG Encoding Living Standard (http://encoding.spec.whatwg.or g/ ).
174 184
175 // Additional aliases present in the WHATWG Encoding Standard 185 // Additional aliases present in the WHATWG Encoding Standard
176 // and Firefox (as of Oct 2014), but not in the upstream ICU. 186 // and Firefox (as of Oct 2014), but not in the upstream ICU.
177 // Three entries for windows-1252 need not be listed here because 187 // Three entries for windows-1252 need not be listed here because
178 // TextCodecLatin1 registers them. 188 // TextCodecLatin1 registers them.
179 // FIXME: We may introduce SYSTEM_ICU and enclose this block
180 // with |#if SYSTEM_ICU| because Chromium's ICU has them all.
181 registrar("csiso58gb231280", "GBK"); 189 registrar("csiso58gb231280", "GBK");
182 registrar("csiso88596e", "ISO-8859-6"); 190 registrar("csiso88596e", "ISO-8859-6");
183 registrar("csiso88596i", "ISO-8859-6"); 191 registrar("csiso88596i", "ISO-8859-6");
184 registrar("csiso88598e", "ISO-8859-8"); 192 registrar("csiso88598e", "ISO-8859-8");
185 registrar("gb_2312", "GBK"); 193 registrar("gb_2312", "GBK");
186 registrar("iso88592", "ISO-8859-2"); 194 registrar("iso88592", "ISO-8859-2");
187 registrar("iso88593", "ISO-8859-3"); 195 registrar("iso88593", "ISO-8859-3");
188 registrar("iso88594", "ISO-8859-4"); 196 registrar("iso88594", "ISO-8859-4");
189 registrar("iso88595", "ISO-8859-5"); 197 registrar("iso88595", "ISO-8859-5");
190 registrar("iso88596", "ISO-8859-6"); 198 registrar("iso88596", "ISO-8859-6");
(...skipping 14 matching lines...) Expand all
205 registrar("iso_8859-8", "ISO-8859-8"); 213 registrar("iso_8859-8", "ISO-8859-8");
206 registrar("iso_8859-9", "windows-1254"); 214 registrar("iso_8859-9", "windows-1254");
207 registrar("iso_8859-15", "ISO-8859-15"); 215 registrar("iso_8859-15", "ISO-8859-15");
208 registrar("koi8_r", "KOI8-R"); 216 registrar("koi8_r", "KOI8-R");
209 registrar("x-cp1253", "windows-1253"); 217 registrar("x-cp1253", "windows-1253");
210 registrar("x-cp1254", "windows-1254"); 218 registrar("x-cp1254", "windows-1254");
211 registrar("x-cp1255", "windows-1255"); 219 registrar("x-cp1255", "windows-1255");
212 registrar("x-cp1256", "windows-1256"); 220 registrar("x-cp1256", "windows-1256");
213 registrar("x-cp1257", "windows-1257"); 221 registrar("x-cp1257", "windows-1257");
214 registrar("x-cp1258", "windows-1258"); 222 registrar("x-cp1258", "windows-1258");
223 #endif
215 } 224 }
216 225
217 void TextCodecICU::registerCodecs(TextCodecRegistrar registrar) 226 void TextCodecICU::registerCodecs(TextCodecRegistrar registrar)
218 { 227 {
219 // See comment above in registerEncodingNames. 228 // See comment above in registerEncodingNames.
220 registrar("ISO-8859-8-I", create, 0); 229 registrar("ISO-8859-8-I", create, 0);
221 230
222 int32_t numEncodings = ucnv_countAvailable(); 231 int32_t numEncodings = ucnv_countAvailable();
223 for (int32_t i = 0; i < numEncodings; ++i) { 232 for (int32_t i = 0; i < numEncodings; ++i) {
224 const char* name = ucnv_getAvailableName(i); 233 const char* name = ucnv_getAvailableName(i);
(...skipping 313 matching lines...) Expand 10 before | Expand all | Expand 10 after
538 { 547 {
539 return encodeCommon(characters, length, handling); 548 return encodeCommon(characters, length, handling);
540 } 549 }
541 550
542 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling) 551 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling)
543 { 552 {
544 return encodeCommon(characters, length, handling); 553 return encodeCommon(characters, length, handling);
545 } 554 }
546 555
547 } // namespace WTF 556 } // namespace WTF
OLDNEW
« LayoutTests/fast/encoding/char-decoding.html ('K') | « Source/config.gyp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698