| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. |
| 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
| 4 * | 4 * |
| 5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
| 6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
| 7 * are met: | 7 * are met: |
| 8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); | 77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); |
| 78 if (!U_SUCCESS(error) || !standardName) { | 78 if (!U_SUCCESS(error) || !standardName) { |
| 79 error = U_ZERO_ERROR; | 79 error = U_ZERO_ERROR; |
| 80 // Try IANA to pick up 'windows-12xx' and other names | 80 // Try IANA to pick up 'windows-12xx' and other names |
| 81 // which are not preferred MIME names but are widely used. | 81 // which are not preferred MIME names but are widely used. |
| 82 standardName = ucnv_getStandardName(name, "IANA", &error); | 82 standardName = ucnv_getStandardName(name, "IANA", &error); |
| 83 if (!U_SUCCESS(error) || !standardName) | 83 if (!U_SUCCESS(error) || !standardName) |
| 84 continue; | 84 continue; |
| 85 } | 85 } |
| 86 | 86 |
| 87 // A number of these aliases are handled in Chrome's copy of ICU, but |
| 88 // Chromium can be compiled with the system ICU. |
| 89 |
| 87 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match
other browsers. | 90 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match
other browsers. |
| 88 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides
a native encoding | 91 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides
a native encoding |
| 89 // for encoding GB_2312-80 and several others. So, we need to overrid
e this behavior, too. | 92 // for encoding GB_2312-80 and several others. So, we need to overrid
e this behavior, too. |
| 90 if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312
-80") == 0) | 93 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80
")) |
| 91 standardName = "GBK"; | 94 standardName = "GBK"; |
| 92 // Similarly, EUC-KR encodings all map to an extended version. | 95 // Similarly, EUC-KR encodings all map to an extended version, but |
| 93 else if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, "
EUC-KR") == 0 || strcmp(standardName, "cp1363") == 0) | 96 // per HTML5, the canonical name still should be EUC-KR. |
| 94 standardName = "windows-949"; | 97 else if (!strcmp(standardName, "EUC-KR") || !strcmp(standardName, "KSC_5
601") || !strcmp(standardName, "cp1363")) |
| 98 standardName = "EUC-KR"; |
| 95 // And so on. | 99 // And so on. |
| 96 else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is re
turned in different case by ICU 3.2 and 3.6. | 100 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return
ed in different case by ICU 3.2 and 3.6. |
| 97 standardName = "windows-1254"; | 101 standardName = "windows-1254"; |
| 98 else if (strcmp(standardName, "TIS-620") == 0) | 102 else if (!strcmp(standardName, "TIS-620")) |
| 99 standardName = "windows-874"; | 103 standardName = "windows-874"; |
| 100 | 104 |
| 101 registrar(standardName, standardName); | 105 registrar(standardName, standardName); |
| 102 | 106 |
| 103 uint16_t numAliases = ucnv_countAliases(name, &error); | 107 uint16_t numAliases = ucnv_countAliases(name, &error); |
| 104 ASSERT(U_SUCCESS(error)); | 108 ASSERT(U_SUCCESS(error)); |
| 105 if (U_SUCCESS(error)) | 109 if (U_SUCCESS(error)) |
| 106 for (uint16_t j = 0; j < numAliases; ++j) { | 110 for (uint16_t j = 0; j < numAliases; ++j) { |
| 107 error = U_ZERO_ERROR; | 111 error = U_ZERO_ERROR; |
| 108 const char* alias = ucnv_getAlias(name, j, &error); | 112 const char* alias = ucnv_getAlias(name, j, &error); |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 140 registrar("iso8859-11", "windows-874"); | 144 registrar("iso8859-11", "windows-874"); |
| 141 registrar("dos-874", "windows-874"); | 145 registrar("dos-874", "windows-874"); |
| 142 registrar("wingreek", "windows-1253"); | 146 registrar("wingreek", "windows-1253"); |
| 143 registrar("winhebrew", "windows-1255"); | 147 registrar("winhebrew", "windows-1255"); |
| 144 registrar("winlatin2", "windows-1250"); | 148 registrar("winlatin2", "windows-1250"); |
| 145 registrar("winturkish", "windows-1254"); | 149 registrar("winturkish", "windows-1254"); |
| 146 registrar("winvietnamese", "windows-1258"); | 150 registrar("winvietnamese", "windows-1258"); |
| 147 registrar("x-cp1250", "windows-1250"); | 151 registrar("x-cp1250", "windows-1250"); |
| 148 registrar("x-cp1251", "windows-1251"); | 152 registrar("x-cp1251", "windows-1251"); |
| 149 registrar("x-euc", "EUC-JP"); | 153 registrar("x-euc", "EUC-JP"); |
| 150 registrar("x-windows-949", "windows-949"); | 154 registrar("x-windows-949", "EUC-KR"); |
| 151 registrar("KSC5601", "KSC_5601"); | 155 registrar("KSC5601", "EUC-KR"); |
| 152 registrar("x-uhc", "windows-949"); | 156 registrar("x-uhc", "EUC-KR"); |
| 153 registrar("shift-jis", "Shift_JIS"); | 157 registrar("shift-jis", "Shift_JIS"); |
| 154 | 158 |
| 155 // These aliases are present in modern versions of ICU, but use different co
decs, and have no standard names. | 159 // These aliases are present in modern versions of ICU, but use different co
decs, and have no standard names. |
| 156 // They are not present in ICU 3.2. | 160 // They are not present in ICU 3.2. |
| 157 registrar("dos-720", "cp864"); | 161 registrar("dos-720", "cp864"); |
| 158 registrar("jis7", "ISO-2022-JP"); | 162 registrar("jis7", "ISO-2022-JP"); |
| 159 | 163 |
| 160 // Alternative spelling of ISO encoding names. | 164 // Alternative spelling of ISO encoding names. |
| 161 registrar("ISO8859-1", "ISO-8859-1"); | 165 registrar("ISO8859-1", "ISO-8859-1"); |
| 162 registrar("ISO8859-2", "ISO-8859-2"); | 166 registrar("ISO8859-2", "ISO-8859-2"); |
| (...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 326 do { | 330 do { |
| 327 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr
ue, err); | 331 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr
ue, err); |
| 328 } while (source < sourceLimit); | 332 } while (source < sourceLimit); |
| 329 sawError = true; | 333 sawError = true; |
| 330 } | 334 } |
| 331 | 335 |
| 332 String resultString = result.toString(); | 336 String resultString = result.toString(); |
| 333 | 337 |
| 334 // <http://bugs.webkit.org/show_bug.cgi?id=17014> | 338 // <http://bugs.webkit.org/show_bug.cgi?id=17014> |
| 335 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu
t ICU decodes it as U+E5E5. | 339 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu
t ICU decodes it as U+E5E5. |
| 336 if (strcmp(m_encoding.name(), "GBK") == 0 || strcasecmp(m_encoding.name(), "
gb18030") == 0) | 340 if (!strcmp(m_encoding.name(), "GBK") || !strcasecmp(m_encoding.name(), "gb1
8030")) |
| 337 resultString.replace(0xE5E5, ideographicSpace); | 341 resultString.replace(0xE5E5, ideographicSpace); |
| 338 | 342 |
| 339 return resultString; | 343 return resultString; |
| 340 } | 344 } |
| 341 | 345 |
| 342 // We need to apply these fallbacks ourselves as they are not currently supporte
d by ICU and | 346 // We need to apply these fallbacks ourselves as they are not currently supporte
d by ICU and |
| 343 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble
m/4708689>. | 347 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble
m/4708689>. |
| 344 static UChar fallbackForGBK(UChar32 character) | 348 static UChar fallbackForGBK(UChar32 character) |
| 345 { | 349 { |
| 346 switch (character) { | 350 switch (character) { |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 515 { | 519 { |
| 516 return encodeCommon(characters, length, handling); | 520 return encodeCommon(characters, length, handling); |
| 517 } | 521 } |
| 518 | 522 |
| 519 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable
Handling handling) | 523 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable
Handling handling) |
| 520 { | 524 { |
| 521 return encodeCommon(characters, length, handling); | 525 return encodeCommon(characters, length, handling); |
| 522 } | 526 } |
| 523 | 527 |
| 524 } // namespace WTF | 528 } // namespace WTF |
| OLD | NEW |