Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(398)

Side by Side Diff: Source/wtf/text/TextCodecICU.cpp

Issue 25870005: Canonical name change: windows-949 => EUC-KR (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: update legacy-encode result and cl description Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « Source/core/inspector/InspectorFileSystemAgent.cpp ('k') | Source/wtf/text/TextEncoding.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); 77 const char* standardName = ucnv_getStandardName(name, "MIME", &error);
78 if (!U_SUCCESS(error) || !standardName) { 78 if (!U_SUCCESS(error) || !standardName) {
79 error = U_ZERO_ERROR; 79 error = U_ZERO_ERROR;
80 // Try IANA to pick up 'windows-12xx' and other names 80 // Try IANA to pick up 'windows-12xx' and other names
81 // which are not preferred MIME names but are widely used. 81 // which are not preferred MIME names but are widely used.
82 standardName = ucnv_getStandardName(name, "IANA", &error); 82 standardName = ucnv_getStandardName(name, "IANA", &error);
83 if (!U_SUCCESS(error) || !standardName) 83 if (!U_SUCCESS(error) || !standardName)
84 continue; 84 continue;
85 } 85 }
86 86
87 // A number of these aliases are handled in Chrome's copy of ICU, but
88 // Chromium can be compiled with the system ICU.
89
87 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers. 90 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
88 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding 91 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
89 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too. 92 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too.
90 if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312 -80") == 0) 93 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80 "))
91 standardName = "GBK"; 94 standardName = "GBK";
92 // Similarly, EUC-KR encodings all map to an extended version. 95 // Similarly, EUC-KR encodings all map to an extended version, but
93 else if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, " EUC-KR") == 0 || strcmp(standardName, "cp1363") == 0) 96 // per HTML5, the canonical name still should be EUC-KR.
94 standardName = "windows-949"; 97 else if (!strcmp(standardName, "EUC-KR") || !strcmp(standardName, "KSC_5 601") || !strcmp(standardName, "cp1363"))
98 standardName = "EUC-KR";
95 // And so on. 99 // And so on.
96 else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is re turned in different case by ICU 3.2 and 3.6. 100 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return ed in different case by ICU 3.2 and 3.6.
97 standardName = "windows-1254"; 101 standardName = "windows-1254";
98 else if (strcmp(standardName, "TIS-620") == 0) 102 else if (!strcmp(standardName, "TIS-620"))
99 standardName = "windows-874"; 103 standardName = "windows-874";
100 104
101 registrar(standardName, standardName); 105 registrar(standardName, standardName);
102 106
103 uint16_t numAliases = ucnv_countAliases(name, &error); 107 uint16_t numAliases = ucnv_countAliases(name, &error);
104 ASSERT(U_SUCCESS(error)); 108 ASSERT(U_SUCCESS(error));
105 if (U_SUCCESS(error)) 109 if (U_SUCCESS(error))
106 for (uint16_t j = 0; j < numAliases; ++j) { 110 for (uint16_t j = 0; j < numAliases; ++j) {
107 error = U_ZERO_ERROR; 111 error = U_ZERO_ERROR;
108 const char* alias = ucnv_getAlias(name, j, &error); 112 const char* alias = ucnv_getAlias(name, j, &error);
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
140 registrar("iso8859-11", "windows-874"); 144 registrar("iso8859-11", "windows-874");
141 registrar("dos-874", "windows-874"); 145 registrar("dos-874", "windows-874");
142 registrar("wingreek", "windows-1253"); 146 registrar("wingreek", "windows-1253");
143 registrar("winhebrew", "windows-1255"); 147 registrar("winhebrew", "windows-1255");
144 registrar("winlatin2", "windows-1250"); 148 registrar("winlatin2", "windows-1250");
145 registrar("winturkish", "windows-1254"); 149 registrar("winturkish", "windows-1254");
146 registrar("winvietnamese", "windows-1258"); 150 registrar("winvietnamese", "windows-1258");
147 registrar("x-cp1250", "windows-1250"); 151 registrar("x-cp1250", "windows-1250");
148 registrar("x-cp1251", "windows-1251"); 152 registrar("x-cp1251", "windows-1251");
149 registrar("x-euc", "EUC-JP"); 153 registrar("x-euc", "EUC-JP");
150 registrar("x-windows-949", "windows-949"); 154 registrar("x-windows-949", "EUC-KR");
151 registrar("KSC5601", "KSC_5601"); 155 registrar("KSC5601", "EUC-KR");
152 registrar("x-uhc", "windows-949"); 156 registrar("x-uhc", "EUC-KR");
153 registrar("shift-jis", "Shift_JIS"); 157 registrar("shift-jis", "Shift_JIS");
154 158
155 // These aliases are present in modern versions of ICU, but use different co decs, and have no standard names. 159 // These aliases are present in modern versions of ICU, but use different co decs, and have no standard names.
156 // They are not present in ICU 3.2. 160 // They are not present in ICU 3.2.
157 registrar("dos-720", "cp864"); 161 registrar("dos-720", "cp864");
158 registrar("jis7", "ISO-2022-JP"); 162 registrar("jis7", "ISO-2022-JP");
159 163
160 // Alternative spelling of ISO encoding names. 164 // Alternative spelling of ISO encoding names.
161 registrar("ISO8859-1", "ISO-8859-1"); 165 registrar("ISO8859-1", "ISO-8859-1");
162 registrar("ISO8859-2", "ISO-8859-2"); 166 registrar("ISO8859-2", "ISO-8859-2");
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after
326 do { 330 do {
327 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr ue, err); 331 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr ue, err);
328 } while (source < sourceLimit); 332 } while (source < sourceLimit);
329 sawError = true; 333 sawError = true;
330 } 334 }
331 335
332 String resultString = result.toString(); 336 String resultString = result.toString();
333 337
334 // <http://bugs.webkit.org/show_bug.cgi?id=17014> 338 // <http://bugs.webkit.org/show_bug.cgi?id=17014>
335 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu t ICU decodes it as U+E5E5. 339 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu t ICU decodes it as U+E5E5.
336 if (strcmp(m_encoding.name(), "GBK") == 0 || strcasecmp(m_encoding.name(), " gb18030") == 0) 340 if (!strcmp(m_encoding.name(), "GBK") || !strcasecmp(m_encoding.name(), "gb1 8030"))
337 resultString.replace(0xE5E5, ideographicSpace); 341 resultString.replace(0xE5E5, ideographicSpace);
338 342
339 return resultString; 343 return resultString;
340 } 344 }
341 345
342 // We need to apply these fallbacks ourselves as they are not currently supporte d by ICU and 346 // We need to apply these fallbacks ourselves as they are not currently supporte d by ICU and
343 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble m/4708689>. 347 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble m/4708689>.
344 static UChar fallbackForGBK(UChar32 character) 348 static UChar fallbackForGBK(UChar32 character)
345 { 349 {
346 switch (character) { 350 switch (character) {
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
515 { 519 {
516 return encodeCommon(characters, length, handling); 520 return encodeCommon(characters, length, handling);
517 } 521 }
518 522
519 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling) 523 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling)
520 { 524 {
521 return encodeCommon(characters, length, handling); 525 return encodeCommon(characters, length, handling);
522 } 526 }
523 527
524 } // namespace WTF 528 } // namespace WTF
OLDNEW
« no previous file with comments | « Source/core/inspector/InspectorFileSystemAgent.cpp ('k') | Source/wtf/text/TextEncoding.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698