| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. |
| 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
| 4 * | 4 * |
| 5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
| 6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
| 7 * are met: | 7 * are met: |
| 8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); | 77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); |
| 78 if (!U_SUCCESS(error) || !standardName) { | 78 if (!U_SUCCESS(error) || !standardName) { |
| 79 error = U_ZERO_ERROR; | 79 error = U_ZERO_ERROR; |
| 80 // Try IANA to pick up 'windows-12xx' and other names | 80 // Try IANA to pick up 'windows-12xx' and other names |
| 81 // which are not preferred MIME names but are widely used. | 81 // which are not preferred MIME names but are widely used. |
| 82 standardName = ucnv_getStandardName(name, "IANA", &error); | 82 standardName = ucnv_getStandardName(name, "IANA", &error); |
| 83 if (!U_SUCCESS(error) || !standardName) | 83 if (!U_SUCCESS(error) || !standardName) |
| 84 continue; | 84 continue; |
| 85 } | 85 } |
| 86 | 86 |
| 87 // A number of these aliases are handled in Chrome's copy of ICU, but | |
| 88 // Chromium can be compiled with the system ICU. | |
| 89 | |
| 90 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match
other browsers. | 87 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match
other browsers. |
| 91 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides
a native encoding | 88 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides
a native encoding |
| 92 // for encoding GB_2312-80 and several others. So, we need to overrid
e this behavior, too. | 89 // for encoding GB_2312-80 and several others. So, we need to overrid
e this behavior, too. |
| 93 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80
")) | 90 if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312
-80") == 0) |
| 94 standardName = "GBK"; | 91 standardName = "GBK"; |
| 95 // Similarly, EUC-KR encodings all map to an extended version, but | 92 // Similarly, EUC-KR encodings all map to an extended version. |
| 96 // per HTML5, the canonical name still should be EUC-KR. | 93 else if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, "
EUC-KR") == 0 || strcmp(standardName, "cp1363") == 0) |
| 97 else if (!strcmp(standardName, "EUC-KR") || !strcmp(standardName, "KSC_5
601") || !strcmp(standardName, "cp1363")) | 94 standardName = "windows-949"; |
| 98 standardName = "EUC-KR"; | |
| 99 // And so on. | 95 // And so on. |
| 100 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return
ed in different case by ICU 3.2 and 3.6. | 96 else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is re
turned in different case by ICU 3.2 and 3.6. |
| 101 standardName = "windows-1254"; | 97 standardName = "windows-1254"; |
| 102 else if (!strcmp(standardName, "TIS-620")) | 98 else if (strcmp(standardName, "TIS-620") == 0) |
| 103 standardName = "windows-874"; | 99 standardName = "windows-874"; |
| 104 | 100 |
| 105 registrar(standardName, standardName); | 101 registrar(standardName, standardName); |
| 106 | 102 |
| 107 uint16_t numAliases = ucnv_countAliases(name, &error); | 103 uint16_t numAliases = ucnv_countAliases(name, &error); |
| 108 ASSERT(U_SUCCESS(error)); | 104 ASSERT(U_SUCCESS(error)); |
| 109 if (U_SUCCESS(error)) | 105 if (U_SUCCESS(error)) |
| 110 for (uint16_t j = 0; j < numAliases; ++j) { | 106 for (uint16_t j = 0; j < numAliases; ++j) { |
| 111 error = U_ZERO_ERROR; | 107 error = U_ZERO_ERROR; |
| 112 const char* alias = ucnv_getAlias(name, j, &error); | 108 const char* alias = ucnv_getAlias(name, j, &error); |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 144 registrar("iso8859-11", "windows-874"); | 140 registrar("iso8859-11", "windows-874"); |
| 145 registrar("dos-874", "windows-874"); | 141 registrar("dos-874", "windows-874"); |
| 146 registrar("wingreek", "windows-1253"); | 142 registrar("wingreek", "windows-1253"); |
| 147 registrar("winhebrew", "windows-1255"); | 143 registrar("winhebrew", "windows-1255"); |
| 148 registrar("winlatin2", "windows-1250"); | 144 registrar("winlatin2", "windows-1250"); |
| 149 registrar("winturkish", "windows-1254"); | 145 registrar("winturkish", "windows-1254"); |
| 150 registrar("winvietnamese", "windows-1258"); | 146 registrar("winvietnamese", "windows-1258"); |
| 151 registrar("x-cp1250", "windows-1250"); | 147 registrar("x-cp1250", "windows-1250"); |
| 152 registrar("x-cp1251", "windows-1251"); | 148 registrar("x-cp1251", "windows-1251"); |
| 153 registrar("x-euc", "EUC-JP"); | 149 registrar("x-euc", "EUC-JP"); |
| 154 registrar("x-windows-949", "EUC-KR"); | 150 registrar("x-windows-949", "windows-949"); |
| 155 registrar("KSC5601", "EUC-KR"); | 151 registrar("KSC5601", "KSC_5601"); |
| 156 registrar("x-uhc", "EUC-KR"); | 152 registrar("x-uhc", "windows-949"); |
| 157 registrar("shift-jis", "Shift_JIS"); | 153 registrar("shift-jis", "Shift_JIS"); |
| 158 | 154 |
| 159 // These aliases are present in modern versions of ICU, but use different co
decs, and have no standard names. | 155 // These aliases are present in modern versions of ICU, but use different co
decs, and have no standard names. |
| 160 // They are not present in ICU 3.2. | 156 // They are not present in ICU 3.2. |
| 161 registrar("dos-720", "cp864"); | 157 registrar("dos-720", "cp864"); |
| 162 registrar("jis7", "ISO-2022-JP"); | 158 registrar("jis7", "ISO-2022-JP"); |
| 163 | 159 |
| 164 // Alternative spelling of ISO encoding names. | 160 // Alternative spelling of ISO encoding names. |
| 165 registrar("ISO8859-1", "ISO-8859-1"); | 161 registrar("ISO8859-1", "ISO-8859-1"); |
| 166 registrar("ISO8859-2", "ISO-8859-2"); | 162 registrar("ISO8859-2", "ISO-8859-2"); |
| (...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 330 do { | 326 do { |
| 331 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr
ue, err); | 327 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr
ue, err); |
| 332 } while (source < sourceLimit); | 328 } while (source < sourceLimit); |
| 333 sawError = true; | 329 sawError = true; |
| 334 } | 330 } |
| 335 | 331 |
| 336 String resultString = result.toString(); | 332 String resultString = result.toString(); |
| 337 | 333 |
| 338 // <http://bugs.webkit.org/show_bug.cgi?id=17014> | 334 // <http://bugs.webkit.org/show_bug.cgi?id=17014> |
| 339 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu
t ICU decodes it as U+E5E5. | 335 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu
t ICU decodes it as U+E5E5. |
| 340 if (!strcmp(m_encoding.name(), "GBK") || !strcasecmp(m_encoding.name(), "gb1
8030")) | 336 if (strcmp(m_encoding.name(), "GBK") == 0 || strcasecmp(m_encoding.name(), "
gb18030") == 0) |
| 341 resultString.replace(0xE5E5, ideographicSpace); | 337 resultString.replace(0xE5E5, ideographicSpace); |
| 342 | 338 |
| 343 return resultString; | 339 return resultString; |
| 344 } | 340 } |
| 345 | 341 |
| 346 // We need to apply these fallbacks ourselves as they are not currently supporte
d by ICU and | 342 // We need to apply these fallbacks ourselves as they are not currently supporte
d by ICU and |
| 347 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble
m/4708689>. | 343 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble
m/4708689>. |
| 348 static UChar fallbackForGBK(UChar32 character) | 344 static UChar fallbackForGBK(UChar32 character) |
| 349 { | 345 { |
| 350 switch (character) { | 346 switch (character) { |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 519 { | 515 { |
| 520 return encodeCommon(characters, length, handling); | 516 return encodeCommon(characters, length, handling); |
| 521 } | 517 } |
| 522 | 518 |
| 523 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable
Handling handling) | 519 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable
Handling handling) |
| 524 { | 520 { |
| 525 return encodeCommon(characters, length, handling); | 521 return encodeCommon(characters, length, handling); |
| 526 } | 522 } |
| 527 | 523 |
| 528 } // namespace WTF | 524 } // namespace WTF |
| OLD | NEW |