| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved. |
| 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
| 4 * | 4 * |
| 5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
| 6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
| 7 * are met: | 7 * are met: |
| 8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 103 const char* standardName = ucnv_getStandardName(name, "MIME", &error); | 103 const char* standardName = ucnv_getStandardName(name, "MIME", &error); |
| 104 if (!U_SUCCESS(error) || !standardName) { | 104 if (!U_SUCCESS(error) || !standardName) { |
| 105 error = U_ZERO_ERROR; | 105 error = U_ZERO_ERROR; |
| 106 // Try IANA to pick up 'windows-12xx' and other names | 106 // Try IANA to pick up 'windows-12xx' and other names |
| 107 // which are not preferred MIME names but are widely used. | 107 // which are not preferred MIME names but are widely used. |
| 108 standardName = ucnv_getStandardName(name, "IANA", &error); | 108 standardName = ucnv_getStandardName(name, "IANA", &error); |
| 109 if (!U_SUCCESS(error) || !standardName) | 109 if (!U_SUCCESS(error) || !standardName) |
| 110 continue; | 110 continue; |
| 111 } | 111 } |
| 112 | 112 |
| 113 // Here, we used to alias GB2312 and GB2312-80 to GBK, but our copy | 113 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match
other browsers. |
| 114 // of ICU treats GB2312/GB2312-80 as synonyms of GBK so that we | 114 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides
a native encoding |
| 115 // don't need that any more. | 115 // for encoding GB_2312-80 and several others. So, we need to overrid
e this behavior, too. |
| 116 if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312
-80") == 0) |
| 117 standardName = "GBK"; |
| 116 // Similarly, EUC-KR encodings all map to an extended version. | 118 // Similarly, EUC-KR encodings all map to an extended version. |
| 117 if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, "EUC-K
R") == 0 || strcmp(standardName, "cp1363") == 0) | 119 else if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, "
EUC-KR") == 0 || strcmp(standardName, "cp1363") == 0) |
| 118 standardName = "windows-949-2000"; | 120 standardName = "windows-949"; |
| 119 // And so on. | 121 // And so on. |
| 120 else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is re
turned in different case by ICU 3.2 and 3.6. | 122 else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is re
turned in different case by ICU 3.2 and 3.6. |
| 121 standardName = "windows-1254"; | 123 standardName = "windows-1254"; |
| 122 else if (strcmp(standardName, "TIS-620") == 0) | 124 else if (strcmp(standardName, "TIS-620") == 0) |
| 123 standardName = "windows-874-2000"; | 125 standardName = "windows-874"; |
| 124 | 126 |
| 125 registrar(standardName, standardName); | 127 registrar(standardName, standardName); |
| 126 | 128 |
| 127 uint16_t numAliases = ucnv_countAliases(name, &error); | 129 uint16_t numAliases = ucnv_countAliases(name, &error); |
| 128 ASSERT(U_SUCCESS(error)); | 130 ASSERT(U_SUCCESS(error)); |
| 129 if (U_SUCCESS(error)) | 131 if (U_SUCCESS(error)) |
| 130 for (uint16_t j = 0; j < numAliases; ++j) { | 132 for (uint16_t j = 0; j < numAliases; ++j) { |
| 131 error = U_ZERO_ERROR; | 133 error = U_ZERO_ERROR; |
| 132 const char* alias = ucnv_getAlias(name, j, &error); | 134 const char* alias = ucnv_getAlias(name, j, &error); |
| 133 ASSERT(U_SUCCESS(error)); | 135 ASSERT(U_SUCCESS(error)); |
| 134 if (U_SUCCESS(error) && alias != standardName) | 136 if (U_SUCCESS(error) && alias != standardName) |
| 135 registrar(alias, standardName); | 137 registrar(alias, standardName); |
| 136 } | 138 } |
| 137 } | 139 } |
| 138 | 140 |
| 139 // We used to map macroman and xmacroman to macintosh, but | 141 // Additional aliases. |
| 140 // we don't need them any more because they're added to our | 142 // These are present in modern versions of ICU, but not in ICU 3.2 (shipped
with Mac OS X 10.4). |
| 141 // local copy of ICU. | 143 registrar("macroman", "macintosh"); |
| 144 registrar("maccyrillic", "x-mac-cyrillic"); |
| 142 | 145 |
| 143 // Additional aliases that historically were present in the encoding | 146 // Additional aliases that historically were present in the encoding |
| 144 // table in WebKit on Macintosh that don't seem to be present in ICU. | 147 // table in WebKit on Macintosh that don't seem to be present in ICU. |
| 145 // Perhaps we can prove these are not used on the web and remove them. | 148 // Perhaps we can prove these are not used on the web and remove them. |
| 146 // Or perhaps we can get them added to ICU. | 149 // Or perhaps we can get them added to ICU. |
| 150 registrar("xmacroman", "macintosh"); |
| 151 registrar("xmacukrainian", "x-mac-cyrillic"); |
| 147 registrar("cnbig5", "Big5"); | 152 registrar("cnbig5", "Big5"); |
| 148 registrar("cngb", "EUC-CN"); | 153 registrar("xxbig5", "Big5"); |
| 154 registrar("cngb", "GBK"); |
| 155 registrar("csgb231280", "GBK"); |
| 156 registrar("xeuccn", "GBK"); |
| 157 registrar("xgbk", "GBK"); |
| 149 registrar("csISO88598I", "ISO_8859-8-I"); | 158 registrar("csISO88598I", "ISO_8859-8-I"); |
| 150 registrar("csgb231280", "EUC-CN"); | |
| 151 registrar("dos720", "cp864"); | |
| 152 registrar("dos874", "TIS-620"); | |
| 153 registrar("jis7", "ISO-2022-JP"); | |
| 154 registrar("koi", "KOI8-R"); | 159 registrar("koi", "KOI8-R"); |
| 155 registrar("logical", "ISO-8859-8-I"); | 160 registrar("logical", "ISO-8859-8-I"); |
| 156 registrar("unicode11utf8", "UTF-8"); | 161 registrar("unicode11utf8", "UTF-8"); |
| 157 registrar("unicode20utf8", "UTF-8"); | 162 registrar("unicode20utf8", "UTF-8"); |
| 163 registrar("xunicode20utf8", "UTF-8"); |
| 158 registrar("visual", "ISO-8859-8"); | 164 registrar("visual", "ISO-8859-8"); |
| 159 registrar("winarabic", "windows-1256"); | 165 registrar("winarabic", "windows-1256"); |
| 160 registrar("winbaltic", "windows-1257"); | 166 registrar("winbaltic", "windows-1257"); |
| 161 registrar("wincyrillic", "windows-1251"); | 167 registrar("wincyrillic", "windows-1251"); |
| 162 registrar("iso885911", "windows874-2000"); | 168 registrar("iso885911", "windows-874"); |
| 169 registrar("dos874", "windows-874"); |
| 163 registrar("wingreek", "windows-1253"); | 170 registrar("wingreek", "windows-1253"); |
| 164 registrar("winhebrew", "windows-1255"); | 171 registrar("winhebrew", "windows-1255"); |
| 165 registrar("winlatin2", "windows-1250"); | 172 registrar("winlatin2", "windows-1250"); |
| 166 registrar("winturkish", "windows-1254"); | 173 registrar("winturkish", "windows-1254"); |
| 167 registrar("winvietnamese", "windows-1258"); | 174 registrar("winvietnamese", "windows-1258"); |
| 168 registrar("xcp1250", "windows-1250"); | 175 registrar("xcp1250", "windows-1250"); |
| 169 registrar("xcp1251", "windows-1251"); | 176 registrar("xcp1251", "windows-1251"); |
| 170 registrar("xeuc", "EUC-JP"); | 177 registrar("xeuc", "EUC-JP"); |
| 171 registrar("xeuccn", "EUC-CN"); | 178 registrar("xwindows949", "windows-949"); |
| 172 registrar("xgbk", "EUC-CN"); | 179 registrar("xuhc", "windows-949"); |
| 173 registrar("xunicode20utf8", "UTF-8"); | |
| 174 registrar("xwindows949", "windows-949-2000"); | |
| 175 registrar("xxbig5", "Big5"); | |
| 176 | |
| 177 // This alias is present in modern versions of ICU, but it has no standard n
ame, | |
| 178 // so we give one to it manually. It is not present in ICU 3.2. | |
| 179 registrar("windows874", "windows874-2000"); | |
| 180 | 180 |
| 181 // These aliases are present in modern versions of ICU, but use different co
decs, and have no standard names. | 181 // These aliases are present in modern versions of ICU, but use different co
decs, and have no standard names. |
| 182 // They are not present in ICU 3.2. | 182 // They are not present in ICU 3.2. |
| 183 registrar("dos720", "cp864"); | 183 registrar("dos720", "cp864"); |
| 184 registrar("jis7", "ISO-2022-JP"); | 184 registrar("jis7", "ISO-2022-JP"); |
| 185 } | 185 } |
| 186 | 186 |
| 187 void TextCodecICU::registerExtendedCodecs(TextCodecRegistrar registrar) | 187 void TextCodecICU::registerExtendedCodecs(TextCodecRegistrar registrar) |
| 188 { | 188 { |
| 189 // See comment above in registerEncodingNames. | 189 // See comment above in registerEncodingNames. |
| (...skipping 285 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 475 result.grow(size + count); | 475 result.grow(size + count); |
| 476 memcpy(result.data() + size, buffer, count); | 476 memcpy(result.data() + size, buffer, count); |
| 477 size += count; | 477 size += count; |
| 478 } while (err == U_BUFFER_OVERFLOW_ERROR); | 478 } while (err == U_BUFFER_OVERFLOW_ERROR); |
| 479 | 479 |
| 480 return CString(result.data(), size); | 480 return CString(result.data(), size); |
| 481 } | 481 } |
| 482 | 482 |
| 483 | 483 |
| 484 } // namespace WebCore | 484 } // namespace WebCore |
| 485 | |
| OLD | NEW |