trunk/Source/wtf/text/TextCodecICU.cpp - Issue 26184004: Revert 158972 "Canonical name change: windows-949 => EUC-KR"

Side by Side Diff: trunk/Source/wtf/text/TextCodecICU.cpp

Issue 26184004: Revert 158972 "Canonical name change: windows-949 => EUC-KR" (Closed) Base URL: svn://svn.chromium.org/blink/

Patch Set: Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.	2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.

3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>	3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>

4 *	4 *

5 * Redistribution and use in source and binary forms, with or without	5 * Redistribution and use in source and binary forms, with or without

6 * modification, are permitted provided that the following conditions	6 * modification, are permitted provided that the following conditions

7 * are met:	7 * are met:

8 * 1. Redistributions of source code must retain the above copyright	8 * 1. Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	9 * notice, this list of conditions and the following disclaimer.

10 * 2. Redistributions in binary form must reproduce the above copyright	10 * 2. Redistributions in binary form must reproduce the above copyright

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
77 const char* standardName = ucnv_getStandardName(name, "MIME", &error);	77 const char* standardName = ucnv_getStandardName(name, "MIME", &error);

78 if (!U_SUCCESS(error) \|\| !standardName) {	78 if (!U_SUCCESS(error) \|\| !standardName) {

79 error = U_ZERO_ERROR;	79 error = U_ZERO_ERROR;

80 // Try IANA to pick up 'windows-12xx' and other names	80 // Try IANA to pick up 'windows-12xx' and other names

81 // which are not preferred MIME names but are widely used.	81 // which are not preferred MIME names but are widely used.

82 standardName = ucnv_getStandardName(name, "IANA", &error);	82 standardName = ucnv_getStandardName(name, "IANA", &error);

83 if (!U_SUCCESS(error) \|\| !standardName)	83 if (!U_SUCCESS(error) \|\| !standardName)

84 continue;	84 continue;

85 }	85 }

86	86

87 // A number of these aliases are handled in Chrome's copy of ICU, but

88 // Chromium can be compiled with the system ICU.

89

90 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.	87 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.

91 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding	88 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding

92 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too.	89 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too.

93 if (!strcmp(standardName, "GB2312") \|\| !strcmp(standardName, "GB_2312-80 "))	90 if (strcmp(standardName, "GB2312") == 0 \|\| strcmp(standardName, "GB_2312 -80") == 0)

94 standardName = "GBK";	91 standardName = "GBK";

95 // Similarly, EUC-KR encodings all map to an extended version, but	92 // Similarly, EUC-KR encodings all map to an extended version.

96 // per HTML5, the canonical name still should be EUC-KR.	93 else if (strcmp(standardName, "KSC_5601") == 0 \|\| strcmp(standardName, " EUC-KR") == 0 \|\| strcmp(standardName, "cp1363") == 0)

97 else if (!strcmp(standardName, "EUC-KR") \|\| !strcmp(standardName, "KSC_5 601") \|\| !strcmp(standardName, "cp1363"))	94 standardName = "windows-949";

98 standardName = "EUC-KR";

99 // And so on.	95 // And so on.

100 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return ed in different case by ICU 3.2 and 3.6.	96 else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is re turned in different case by ICU 3.2 and 3.6.

101 standardName = "windows-1254";	97 standardName = "windows-1254";

102 else if (!strcmp(standardName, "TIS-620"))	98 else if (strcmp(standardName, "TIS-620") == 0)

103 standardName = "windows-874";	99 standardName = "windows-874";

104	100

105 registrar(standardName, standardName);	101 registrar(standardName, standardName);

106	102

107 uint16_t numAliases = ucnv_countAliases(name, &error);	103 uint16_t numAliases = ucnv_countAliases(name, &error);

108 ASSERT(U_SUCCESS(error));	104 ASSERT(U_SUCCESS(error));

109 if (U_SUCCESS(error))	105 if (U_SUCCESS(error))

110 for (uint16_t j = 0; j < numAliases; ++j) {	106 for (uint16_t j = 0; j < numAliases; ++j) {

111 error = U_ZERO_ERROR;	107 error = U_ZERO_ERROR;

112 const char* alias = ucnv_getAlias(name, j, &error);	108 const char* alias = ucnv_getAlias(name, j, &error);

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
144 registrar("iso8859-11", "windows-874");	140 registrar("iso8859-11", "windows-874");

145 registrar("dos-874", "windows-874");	141 registrar("dos-874", "windows-874");

146 registrar("wingreek", "windows-1253");	142 registrar("wingreek", "windows-1253");

147 registrar("winhebrew", "windows-1255");	143 registrar("winhebrew", "windows-1255");

148 registrar("winlatin2", "windows-1250");	144 registrar("winlatin2", "windows-1250");

149 registrar("winturkish", "windows-1254");	145 registrar("winturkish", "windows-1254");

150 registrar("winvietnamese", "windows-1258");	146 registrar("winvietnamese", "windows-1258");

151 registrar("x-cp1250", "windows-1250");	147 registrar("x-cp1250", "windows-1250");

152 registrar("x-cp1251", "windows-1251");	148 registrar("x-cp1251", "windows-1251");

153 registrar("x-euc", "EUC-JP");	149 registrar("x-euc", "EUC-JP");

154 registrar("x-windows-949", "EUC-KR");	150 registrar("x-windows-949", "windows-949");

155 registrar("KSC5601", "EUC-KR");	151 registrar("KSC5601", "KSC_5601");

156 registrar("x-uhc", "EUC-KR");	152 registrar("x-uhc", "windows-949");

157 registrar("shift-jis", "Shift_JIS");	153 registrar("shift-jis", "Shift_JIS");

158	154

159 // These aliases are present in modern versions of ICU, but use different co decs, and have no standard names.	155 // These aliases are present in modern versions of ICU, but use different co decs, and have no standard names.

160 // They are not present in ICU 3.2.	156 // They are not present in ICU 3.2.

161 registrar("dos-720", "cp864");	157 registrar("dos-720", "cp864");

162 registrar("jis7", "ISO-2022-JP");	158 registrar("jis7", "ISO-2022-JP");

163	159

164 // Alternative spelling of ISO encoding names.	160 // Alternative spelling of ISO encoding names.

165 registrar("ISO8859-1", "ISO-8859-1");	161 registrar("ISO8859-1", "ISO-8859-1");

166 registrar("ISO8859-2", "ISO-8859-2");	162 registrar("ISO8859-2", "ISO-8859-2");

(...skipping 163 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
330 do {	326 do {

331 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr ue, err);	327 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr ue, err);

332 } while (source < sourceLimit);	328 } while (source < sourceLimit);

333 sawError = true;	329 sawError = true;

334 }	330 }

335	331

336 String resultString = result.toString();	332 String resultString = result.toString();

337	333

338 // <http://bugs.webkit.org/show_bug.cgi?id=17014>	334 // <http://bugs.webkit.org/show_bug.cgi?id=17014>

339 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu t ICU decodes it as U+E5E5.	335 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu t ICU decodes it as U+E5E5.

340 if (!strcmp(m_encoding.name(), "GBK") \|\| !strcasecmp(m_encoding.name(), "gb1 8030"))	336 if (strcmp(m_encoding.name(), "GBK") == 0 \|\| strcasecmp(m_encoding.name(), " gb18030") == 0)

341 resultString.replace(0xE5E5, ideographicSpace);	337 resultString.replace(0xE5E5, ideographicSpace);

342	338

343 return resultString;	339 return resultString;

344 }	340 }

345	341

346 // We need to apply these fallbacks ourselves as they are not currently supporte d by ICU and	342 // We need to apply these fallbacks ourselves as they are not currently supporte d by ICU and

347 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble m/4708689>.	343 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble m/4708689>.

348 static UChar fallbackForGBK(UChar32 character)	344 static UChar fallbackForGBK(UChar32 character)

349 {	345 {

350 switch (character) {	346 switch (character) {

(...skipping 168 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
519 {	515 {

520 return encodeCommon(characters, length, handling);	516 return encodeCommon(characters, length, handling);

521 }	517 }

522	518

523 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling)	519 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling)

524 {	520 {

525 return encodeCommon(characters, length, handling);	521 return encodeCommon(characters, length, handling);

526 }	522 }

527	523

528 } // namespace WTF	524 } // namespace WTF

OLD	NEW

« no previous file with comments | « trunk/Source/core/inspector/InspectorFileSystemAgent.cpp ('k') | trunk/Source/wtf/text/TextEncoding.h » ('j') | no next file with comments »