Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1482)

Unified Diff: Source/wtf/text/TextCodecICU.cpp

Issue 1167523003: Define a variable to distinguish system_icu from bundled_icu in Blink (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: fix the comment in BUILD.gn Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « Source/wtf/text/TextCodecICU.h ('k') | Source/wtf/unicode/CharacterNames.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: Source/wtf/text/TextCodecICU.cpp
diff --git a/Source/wtf/text/TextCodecICU.cpp b/Source/wtf/text/TextCodecICU.cpp
index 15beef74dc906cd436f93a86feb17a124a202858..f4460b6c1dd9017891bf2ecf0edc053c1c28722d 100644
--- a/Source/wtf/text/TextCodecICU.cpp
+++ b/Source/wtf/text/TextCodecICU.cpp
@@ -71,16 +71,20 @@ void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)
for (int32_t i = 0; i < numEncodings; ++i) {
const char* name = ucnv_getAvailableName(i);
UErrorCode error = U_ZERO_ERROR;
- // Try MIME before trying IANA to pick up commonly used names like
- // 'EUC-JP' instead of horrendously long names like
- // 'Extended_UNIX_Code_Packed_Format_for_Japanese'.
- const char* standardName = ucnv_getStandardName(name, "MIME", &error);
- if (!U_SUCCESS(error) || !standardName) {
+#if !defined(USING_SYSTEM_ICU)
+ const char* primaryStandard = "HTML";
+ const char* secondaryStandard = "MIME";
+#else
+ const char* primaryStandard = "MIME";
+ const char* secondaryStandard = "IANA";
+#endif
+ const char* standardName = ucnv_getStandardName(name, primaryStandard, &error);
+ if (U_FAILURE(error) || !standardName) {
error = U_ZERO_ERROR;
// Try IANA to pick up 'windows-12xx' and other names
// which are not preferred MIME names but are widely used.
- standardName = ucnv_getStandardName(name, "IANA", &error);
- if (!U_SUCCESS(error) || !standardName)
+ standardName = ucnv_getStandardName(name, secondaryStandard, &error);
+ if (U_FAILURE(error) || !standardName)
continue;
}
@@ -90,6 +94,7 @@ void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)
// 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
// 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
// for encoding GB_2312-80 and several others. So, we need to override this behavior, too.
+#if defined(USING_SYSTEM_ICU)
if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80"))
standardName = "GBK";
// Similarly, EUC-KR encodings all map to an extended version, but
@@ -101,6 +106,7 @@ void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)
standardName = "windows-1254";
else if (!strcmp(standardName, "TIS-620"))
standardName = "windows-874";
+#endif
registrar(standardName, standardName);
@@ -116,6 +122,12 @@ void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)
}
}
+ // These two entries have to be added here because ICU's converter table
+ // cannot have both ISO-8859-8-I and ISO-8859-8.
+ registrar("csISO88598I", "ISO-8859-8-I");
+ registrar("logical", "ISO-8859-8-I");
+
+#if defined(USING_SYSTEM_ICU)
// Additional alias for MacCyrillic not present in ICU.
registrar("maccyrillic", "x-mac-cyrillic");
@@ -131,9 +143,7 @@ void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)
registrar("csgb231280", "GBK");
registrar("x-euc-cn", "GBK");
registrar("x-gbk", "GBK");
- registrar("csISO88598I", "ISO-8859-8-I");
registrar("koi", "KOI8-R");
- registrar("logical", "ISO-8859-8-I");
registrar("visual", "ISO-8859-8");
registrar("winarabic", "windows-1256");
registrar("winbaltic", "windows-1257");
@@ -176,8 +186,6 @@ void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)
// and Firefox (as of Oct 2014), but not in the upstream ICU.
// Three entries for windows-1252 need not be listed here because
// TextCodecLatin1 registers them.
- // FIXME: We may introduce SYSTEM_ICU and enclose this block
- // with |#if SYSTEM_ICU| because Chromium's ICU has them all.
registrar("csiso58gb231280", "GBK");
registrar("csiso88596e", "ISO-8859-6");
registrar("csiso88596i", "ISO-8859-6");
@@ -212,6 +220,7 @@ void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)
registrar("x-cp1256", "windows-1256");
registrar("x-cp1257", "windows-1257");
registrar("x-cp1258", "windows-1258");
+#endif
}
void TextCodecICU::registerCodecs(TextCodecRegistrar registrar)
@@ -237,7 +246,9 @@ void TextCodecICU::registerCodecs(TextCodecRegistrar registrar)
TextCodecICU::TextCodecICU(const TextEncoding& encoding)
: m_encoding(encoding)
, m_converterICU(0)
+#if defined(USING_SYSTEM_ICU)
, m_needsGBKFallbacks(false)
+#endif
{
}
@@ -261,8 +272,10 @@ void TextCodecICU::createICUConverter() const
{
ASSERT(!m_converterICU);
+#if defined(USING_SYSTEM_ICU)
const char* name = m_encoding.name();
m_needsGBKFallbacks = name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3];
+#endif
UErrorCode err;
@@ -367,32 +380,41 @@ String TextCodecICU::decode(const char* bytes, size_t length, FlushBehavior flus
sawError = true;
}
+#if !defined(USING_SYSTEM_ICU)
+ // Chrome's copy of ICU does not have the issue described below.
+ return result.toString();
+#else
String resultString = result.toString();
// <http://bugs.webkit.org/show_bug.cgi?id=17014>
// Simplified Chinese pages use the code A3A0 to mean "full-width space", but ICU decodes it as U+E5E5.
- if (!strcmp(m_encoding.name(), "GBK") || !strcasecmp(m_encoding.name(), "gb18030"))
- resultString.replace(0xE5E5, ideographicSpaceCharacter);
+ if (!strcmp(m_encoding.name(), "GBK")) {
+ if (!strcasecmp(m_encoding.name(), "gb18030"))
+ resultString.replace(0xE5E5, ideographicSpaceCharacter);
+ // Make GBK compliant to the encoding spec and align with GB18030
+ resultString.replace(0x01F9, 0xE7C8);
+ // FIXME: Once https://www.w3.org/Bugs/Public/show_bug.cgi?id=28740#c3
+ // is resolved, add U+1E3F => 0xE7C7.
+ }
return resultString;
+#endif
}
-// We need to apply these fallbacks ourselves as they are not currently supported by ICU and
-// they were provided by the old TEC encoding path. Needed to fix <rdar://problem/4708689>.
+#if defined(USING_SYSTEM_ICU)
+// U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding
+// spec, but ICU converter does not have them.
static UChar fallbackForGBK(UChar32 character)
{
switch (character) {
case 0x01F9:
- return 0xE7C8;
+ return 0xE7C8; // mapped to xA8xBF by ICU.
case 0x1E3F:
- return 0xE7C7;
- case 0x22EF:
- return 0x2026;
- case 0x301C:
- return 0xFF5E;
+ return 0xE7C7; // mapped to xA8xBC by ICU.
}
return 0;
}
+#endif
// Invalid character handler when writing escaped entities for unrepresentable
// characters. See the declaration of TextCodec::encode for more.
@@ -409,6 +431,7 @@ static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeA
UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
}
+#if defined(USING_SYSTEM_ICU)
// Substitutes special GBK characters, escaping all other unassigned entities.
static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
@@ -452,6 +475,7 @@ static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs
}
UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
}
+#endif // USING_SYSTEM_ICU
class TextCodecInput {
public:
@@ -488,13 +512,25 @@ CString TextCodecICU::encodeInternal(const TextCodecInput& input, UnencodableHan
switch (handling) {
case QuestionMarksForUnencodables:
ucnv_setSubstChars(m_converterICU, "?", 1, &err);
+#if !defined(USING_SYSTEM_ICU)
+ ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
+#else
ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
+#endif
break;
case EntitiesForUnencodables:
+#if !defined(USING_SYSTEM_ICU)
+ ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
+#else
ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
+#endif
break;
case URLEncodedEntitiesForUnencodables:
+#if !defined(USING_SYSTEM_ICU)
+ ucnv_setFromUCallBack(m_converterICU, urlEscapedEntityCallback, 0, 0, 0, &err);
+#else
ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
+#endif
break;
}
« no previous file with comments | « Source/wtf/text/TextCodecICU.h ('k') | Source/wtf/unicode/CharacterNames.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698