| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. |
| 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
| 4 * | 4 * |
| 5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
| 6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
| 7 * are met: | 7 * are met: |
| 8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
| 11 * notice, this list of conditions and the following disclaimer in the | 11 * notice, this list of conditions and the following disclaimer in the |
| 12 * documentation and/or other materials provided with the distribution. | 12 * documentation and/or other materials provided with the distribution. |
| 13 * | 13 * |
| 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY | 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY |
| 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR | 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR |
| 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 25 */ | 25 */ |
| 26 | 26 |
| 27 #include "config.h" | 27 #include "config.h" |
| 28 #include "wtf/text/TextCodecICU.h" | 28 #include "wtf/text/TextCodecICU.h" |
| 29 | 29 |
| 30 #include <unicode/ucnv.h> | 30 #include <unicode/ucnv.h> |
| 31 #include <unicode/ucnv_cb.h> | 31 #include <unicode/ucnv_cb.h> |
| 32 #include "wtf/Assertions.h" | 32 #include "wtf/Assertions.h" |
| 33 #include "wtf/StringExtras.h" | 33 #include "wtf/StringExtras.h" |
| 34 #include "wtf/Threading.h" | 34 #include "wtf/Threading.h" |
| (...skipping 30 matching lines...) Expand all Loading... |
| 65 // Otherwise, this would share the same canonical name as the | 65 // Otherwise, this would share the same canonical name as the |
| 66 // visual ordering case, and then TextEncoding could not tell them | 66 // visual ordering case, and then TextEncoding could not tell them |
| 67 // apart; ICU treats these names as synonyms. | 67 // apart; ICU treats these names as synonyms. |
| 68 registrar("ISO-8859-8-I", "ISO-8859-8-I"); | 68 registrar("ISO-8859-8-I", "ISO-8859-8-I"); |
| 69 | 69 |
| 70 int32_t numEncodings = ucnv_countAvailable(); | 70 int32_t numEncodings = ucnv_countAvailable(); |
| 71 for (int32_t i = 0; i < numEncodings; ++i) { | 71 for (int32_t i = 0; i < numEncodings; ++i) { |
| 72 const char* name = ucnv_getAvailableName(i); | 72 const char* name = ucnv_getAvailableName(i); |
| 73 UErrorCode error = U_ZERO_ERROR; | 73 UErrorCode error = U_ZERO_ERROR; |
| 74 // Try MIME before trying IANA to pick up commonly used names like | 74 // Try MIME before trying IANA to pick up commonly used names like |
| 75 // 'EUC-JP' instead of horrendously long names like | 75 // 'EUC-JP' instead of horrendously long names like |
| 76 // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. | 76 // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. |
| 77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); | 77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); |
| 78 if (!U_SUCCESS(error) || !standardName) { | 78 if (!U_SUCCESS(error) || !standardName) { |
| 79 error = U_ZERO_ERROR; | 79 error = U_ZERO_ERROR; |
| 80 // Try IANA to pick up 'windows-12xx' and other names | 80 // Try IANA to pick up 'windows-12xx' and other names |
| 81 // which are not preferred MIME names but are widely used. | 81 // which are not preferred MIME names but are widely used. |
| 82 standardName = ucnv_getStandardName(name, "IANA", &error); | 82 standardName = ucnv_getStandardName(name, "IANA", &error); |
| 83 if (!U_SUCCESS(error) || !standardName) | 83 if (!U_SUCCESS(error) || !standardName) |
| 84 continue; | 84 continue; |
| 85 } | 85 } |
| 86 | 86 |
| 87 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match
other browsers. | 87 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match
other browsers. |
| 88 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides
a native encoding | 88 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides
a native encoding |
| 89 // for encoding GB_2312-80 and several others. So, we need to overrid
e this behavior, too. | 89 // for encoding GB_2312-80 and several others. So, we need to overrid
e this behavior, too. |
| 90 if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312
-80") == 0) | 90 if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312
-80") == 0) |
| 91 standardName = "GBK"; | 91 standardName = "GBK"; |
| (...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 297 { | 297 { |
| 298 // Get a converter for the passed-in encoding. | 298 // Get a converter for the passed-in encoding. |
| 299 if (!m_converterICU) { | 299 if (!m_converterICU) { |
| 300 createICUConverter(); | 300 createICUConverter(); |
| 301 ASSERT(m_converterICU); | 301 ASSERT(m_converterICU); |
| 302 if (!m_converterICU) { | 302 if (!m_converterICU) { |
| 303 LOG_ERROR("error creating ICU encoder even though encoding was in ta
ble"); | 303 LOG_ERROR("error creating ICU encoder even though encoding was in ta
ble"); |
| 304 return String(); | 304 return String(); |
| 305 } | 305 } |
| 306 } | 306 } |
| 307 | 307 |
| 308 ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError); | 308 ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError); |
| 309 | 309 |
| 310 StringBuilder result; | 310 StringBuilder result; |
| 311 | 311 |
| 312 UChar buffer[ConversionBufferSize]; | 312 UChar buffer[ConversionBufferSize]; |
| 313 UChar* bufferLimit = buffer + ConversionBufferSize; | 313 UChar* bufferLimit = buffer + ConversionBufferSize; |
| 314 const char* source = reinterpret_cast<const char*>(bytes); | 314 const char* source = reinterpret_cast<const char*>(bytes); |
| 315 const char* sourceLimit = source + length; | 315 const char* sourceLimit = source + length; |
| 316 int32_t* offsets = NULL; | 316 int32_t* offsets = NULL; |
| 317 UErrorCode err = U_ZERO_ERROR; | 317 UErrorCode err = U_ZERO_ERROR; |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 366 | 366 |
| 367 UnencodableReplacementArray entity; | 367 UnencodableReplacementArray entity; |
| 368 int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncod
edEntitiesForUnencodables, entity); | 368 int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncod
edEntitiesForUnencodables, entity); |
| 369 ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err); | 369 ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err); |
| 370 } else | 370 } else |
| 371 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codeP
oint, reason, err); | 371 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codeP
oint, reason, err); |
| 372 } | 372 } |
| 373 | 373 |
| 374 // Substitutes special GBK characters, escaping all other unassigned entities. | 374 // Substitutes special GBK characters, escaping all other unassigned entities. |
| 375 static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fr
omUArgs, const UChar* codeUnits, int32_t length, | 375 static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fr
omUArgs, const UChar* codeUnits, int32_t length, |
| 376 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) | 376 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) |
| 377 { | 377 { |
| 378 UChar outChar; | 378 UChar outChar; |
| 379 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { | 379 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { |
| 380 const UChar* source = &outChar; | 380 const UChar* source = &outChar; |
| 381 *err = U_ZERO_ERROR; | 381 *err = U_ZERO_ERROR; |
| 382 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 382 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); |
| 383 return; | 383 return; |
| 384 } | 384 } |
| 385 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint
, reason, err); | 385 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint
, reason, err); |
| 386 } | 386 } |
| 387 | 387 |
| 388 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution. | 388 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution. |
| 389 static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicod
eArgs* fromUArgs, const UChar* codeUnits, int32_t length, | 389 static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicod
eArgs* fromUArgs, const UChar* codeUnits, int32_t length, |
| 390 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) | 390 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) |
| 391 { | 391 { |
| 392 if (reason == UCNV_UNASSIGNED) { | 392 if (reason == UCNV_UNASSIGNED) { |
| 393 if (UChar outChar = fallbackForGBK(codePoint)) { | 393 if (UChar outChar = fallbackForGBK(codePoint)) { |
| 394 const UChar* source = &outChar; | 394 const UChar* source = &outChar; |
| 395 *err = U_ZERO_ERROR; | 395 *err = U_ZERO_ERROR; |
| 396 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 396 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); |
| 397 return; | 397 return; |
| 398 } | 398 } |
| 399 urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoin
t, reason, err); | 399 urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoin
t, reason, err); |
| 400 return; | 400 return; |
| 401 } | 401 } |
| 402 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint
, reason, err); | 402 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint
, reason, err); |
| 403 } | 403 } |
| 404 | 404 |
| 405 static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs
* fromUArgs, const UChar* codeUnits, int32_t length, | 405 static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs
* fromUArgs, const UChar* codeUnits, int32_t length, |
| 406 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) | 406 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) |
| 407 { | 407 { |
| 408 UChar outChar; | 408 UChar outChar; |
| 409 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { | 409 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { |
| 410 const UChar* source = &outChar; | 410 const UChar* source = &outChar; |
| 411 *err = U_ZERO_ERROR; | 411 *err = U_ZERO_ERROR; |
| 412 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 412 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); |
| 413 return; | 413 return; |
| 414 } | 414 } |
| 415 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codeP
oint, reason, err); | 415 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codeP
oint, reason, err); |
| 416 } | 416 } |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 515 { | 515 { |
| 516 return encodeCommon(characters, length, handling); | 516 return encodeCommon(characters, length, handling); |
| 517 } | 517 } |
| 518 | 518 |
| 519 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable
Handling handling) | 519 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable
Handling handling) |
| 520 { | 520 { |
| 521 return encodeCommon(characters, length, handling); | 521 return encodeCommon(characters, length, handling); |
| 522 } | 522 } |
| 523 | 523 |
| 524 } // namespace WTF | 524 } // namespace WTF |
| OLD | NEW |