Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: trunk/Source/wtf/text/TextCodecICU.cpp

Issue 26184004: Revert 158972 "Canonical name change: windows-949 => EUC-KR" (Closed) Base URL: svn://svn.chromium.org/blink/
Patch Set: Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); 77 const char* standardName = ucnv_getStandardName(name, "MIME", &error);
78 if (!U_SUCCESS(error) || !standardName) { 78 if (!U_SUCCESS(error) || !standardName) {
79 error = U_ZERO_ERROR; 79 error = U_ZERO_ERROR;
80 // Try IANA to pick up 'windows-12xx' and other names 80 // Try IANA to pick up 'windows-12xx' and other names
81 // which are not preferred MIME names but are widely used. 81 // which are not preferred MIME names but are widely used.
82 standardName = ucnv_getStandardName(name, "IANA", &error); 82 standardName = ucnv_getStandardName(name, "IANA", &error);
83 if (!U_SUCCESS(error) || !standardName) 83 if (!U_SUCCESS(error) || !standardName)
84 continue; 84 continue;
85 } 85 }
86 86
87 // A number of these aliases are handled in Chrome's copy of ICU, but
88 // Chromium can be compiled with the system ICU.
89
90 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers. 87 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
91 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding 88 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
92 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too. 89 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too.
93 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80 ")) 90 if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312 -80") == 0)
94 standardName = "GBK"; 91 standardName = "GBK";
95 // Similarly, EUC-KR encodings all map to an extended version, but 92 // Similarly, EUC-KR encodings all map to an extended version.
96 // per HTML5, the canonical name still should be EUC-KR. 93 else if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, " EUC-KR") == 0 || strcmp(standardName, "cp1363") == 0)
97 else if (!strcmp(standardName, "EUC-KR") || !strcmp(standardName, "KSC_5 601") || !strcmp(standardName, "cp1363")) 94 standardName = "windows-949";
98 standardName = "EUC-KR";
99 // And so on. 95 // And so on.
100 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return ed in different case by ICU 3.2 and 3.6. 96 else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is re turned in different case by ICU 3.2 and 3.6.
101 standardName = "windows-1254"; 97 standardName = "windows-1254";
102 else if (!strcmp(standardName, "TIS-620")) 98 else if (strcmp(standardName, "TIS-620") == 0)
103 standardName = "windows-874"; 99 standardName = "windows-874";
104 100
105 registrar(standardName, standardName); 101 registrar(standardName, standardName);
106 102
107 uint16_t numAliases = ucnv_countAliases(name, &error); 103 uint16_t numAliases = ucnv_countAliases(name, &error);
108 ASSERT(U_SUCCESS(error)); 104 ASSERT(U_SUCCESS(error));
109 if (U_SUCCESS(error)) 105 if (U_SUCCESS(error))
110 for (uint16_t j = 0; j < numAliases; ++j) { 106 for (uint16_t j = 0; j < numAliases; ++j) {
111 error = U_ZERO_ERROR; 107 error = U_ZERO_ERROR;
112 const char* alias = ucnv_getAlias(name, j, &error); 108 const char* alias = ucnv_getAlias(name, j, &error);
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
144 registrar("iso8859-11", "windows-874"); 140 registrar("iso8859-11", "windows-874");
145 registrar("dos-874", "windows-874"); 141 registrar("dos-874", "windows-874");
146 registrar("wingreek", "windows-1253"); 142 registrar("wingreek", "windows-1253");
147 registrar("winhebrew", "windows-1255"); 143 registrar("winhebrew", "windows-1255");
148 registrar("winlatin2", "windows-1250"); 144 registrar("winlatin2", "windows-1250");
149 registrar("winturkish", "windows-1254"); 145 registrar("winturkish", "windows-1254");
150 registrar("winvietnamese", "windows-1258"); 146 registrar("winvietnamese", "windows-1258");
151 registrar("x-cp1250", "windows-1250"); 147 registrar("x-cp1250", "windows-1250");
152 registrar("x-cp1251", "windows-1251"); 148 registrar("x-cp1251", "windows-1251");
153 registrar("x-euc", "EUC-JP"); 149 registrar("x-euc", "EUC-JP");
154 registrar("x-windows-949", "EUC-KR"); 150 registrar("x-windows-949", "windows-949");
155 registrar("KSC5601", "EUC-KR"); 151 registrar("KSC5601", "KSC_5601");
156 registrar("x-uhc", "EUC-KR"); 152 registrar("x-uhc", "windows-949");
157 registrar("shift-jis", "Shift_JIS"); 153 registrar("shift-jis", "Shift_JIS");
158 154
159 // These aliases are present in modern versions of ICU, but use different co decs, and have no standard names. 155 // These aliases are present in modern versions of ICU, but use different co decs, and have no standard names.
160 // They are not present in ICU 3.2. 156 // They are not present in ICU 3.2.
161 registrar("dos-720", "cp864"); 157 registrar("dos-720", "cp864");
162 registrar("jis7", "ISO-2022-JP"); 158 registrar("jis7", "ISO-2022-JP");
163 159
164 // Alternative spelling of ISO encoding names. 160 // Alternative spelling of ISO encoding names.
165 registrar("ISO8859-1", "ISO-8859-1"); 161 registrar("ISO8859-1", "ISO-8859-1");
166 registrar("ISO8859-2", "ISO-8859-2"); 162 registrar("ISO8859-2", "ISO-8859-2");
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after
330 do { 326 do {
331 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr ue, err); 327 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr ue, err);
332 } while (source < sourceLimit); 328 } while (source < sourceLimit);
333 sawError = true; 329 sawError = true;
334 } 330 }
335 331
336 String resultString = result.toString(); 332 String resultString = result.toString();
337 333
338 // <http://bugs.webkit.org/show_bug.cgi?id=17014> 334 // <http://bugs.webkit.org/show_bug.cgi?id=17014>
339 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu t ICU decodes it as U+E5E5. 335 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu t ICU decodes it as U+E5E5.
340 if (!strcmp(m_encoding.name(), "GBK") || !strcasecmp(m_encoding.name(), "gb1 8030")) 336 if (strcmp(m_encoding.name(), "GBK") == 0 || strcasecmp(m_encoding.name(), " gb18030") == 0)
341 resultString.replace(0xE5E5, ideographicSpace); 337 resultString.replace(0xE5E5, ideographicSpace);
342 338
343 return resultString; 339 return resultString;
344 } 340 }
345 341
346 // We need to apply these fallbacks ourselves as they are not currently supporte d by ICU and 342 // We need to apply these fallbacks ourselves as they are not currently supporte d by ICU and
347 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble m/4708689>. 343 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble m/4708689>.
348 static UChar fallbackForGBK(UChar32 character) 344 static UChar fallbackForGBK(UChar32 character)
349 { 345 {
350 switch (character) { 346 switch (character) {
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
519 { 515 {
520 return encodeCommon(characters, length, handling); 516 return encodeCommon(characters, length, handling);
521 } 517 }
522 518
523 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling) 519 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling)
524 { 520 {
525 return encodeCommon(characters, length, handling); 521 return encodeCommon(characters, length, handling);
526 } 522 }
527 523
528 } // namespace WTF 524 } // namespace WTF
OLDNEW
« no previous file with comments | « trunk/Source/core/inspector/InspectorFileSystemAgent.cpp ('k') | trunk/Source/wtf/text/TextEncoding.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698