| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. |
| 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
| 4 * | 4 * |
| 5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
| 6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
| 7 * are met: | 7 * are met: |
| 8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
| (...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 240 error = U_ZERO_ERROR; | 240 error = U_ZERO_ERROR; |
| 241 standard_name = ucnv_getStandardName(name, "IANA", &error); | 241 standard_name = ucnv_getStandardName(name, "IANA", &error); |
| 242 if (!U_SUCCESS(error) || !standard_name) | 242 if (!U_SUCCESS(error) || !standard_name) |
| 243 continue; | 243 continue; |
| 244 } | 244 } |
| 245 registrar(standard_name, Create, 0); | 245 registrar(standard_name, Create, 0); |
| 246 } | 246 } |
| 247 } | 247 } |
| 248 | 248 |
| 249 TextCodecICU::TextCodecICU(const TextEncoding& encoding) | 249 TextCodecICU::TextCodecICU(const TextEncoding& encoding) |
| 250 : encoding_(encoding), | 250 : encoding_(encoding) {} |
| 251 converter_icu_(0) | |
| 252 #if defined(USING_SYSTEM_ICU) | |
| 253 , | |
| 254 m_needsGBKFallbacks(false) | |
| 255 #endif | |
| 256 { | |
| 257 } | |
| 258 | 251 |
| 259 TextCodecICU::~TextCodecICU() { | 252 TextCodecICU::~TextCodecICU() { |
| 260 ReleaseICUConverter(); | 253 ReleaseICUConverter(); |
| 261 } | 254 } |
| 262 | 255 |
| 263 void TextCodecICU::ReleaseICUConverter() const { | 256 void TextCodecICU::ReleaseICUConverter() const { |
| 264 if (converter_icu_) { | 257 if (converter_icu_) { |
| 265 UConverter*& cached_converter = CachedConverterICU(); | 258 UConverter*& cached_converter = CachedConverterICU(); |
| 266 if (cached_converter) | 259 if (cached_converter) |
| 267 ucnv_close(cached_converter); | 260 ucnv_close(cached_converter); |
| 268 cached_converter = converter_icu_; | 261 cached_converter = converter_icu_; |
| 269 converter_icu_ = 0; | 262 converter_icu_ = nullptr; |
| 270 } | 263 } |
| 271 } | 264 } |
| 272 | 265 |
| 273 void TextCodecICU::CreateICUConverter() const { | 266 void TextCodecICU::CreateICUConverter() const { |
| 274 DCHECK(!converter_icu_); | 267 DCHECK(!converter_icu_); |
| 275 | 268 |
| 276 #if defined(USING_SYSTEM_ICU) | 269 #if defined(USING_SYSTEM_ICU) |
| 277 const char* name = encoding_.GetName(); | 270 const char* name = encoding_.GetName(); |
| 278 m_needsGBKFallbacks = | 271 needs_gbk_fallbacks_ = |
| 279 name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3]; | 272 name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3]; |
| 280 #endif | 273 #endif |
| 281 | 274 |
| 282 UErrorCode err; | 275 UErrorCode err; |
| 283 | 276 |
| 284 UConverter*& cached_converter = CachedConverterICU(); | 277 UConverter*& cached_converter = CachedConverterICU(); |
| 285 if (cached_converter) { | 278 if (cached_converter) { |
| 286 err = U_ZERO_ERROR; | 279 err = U_ZERO_ERROR; |
| 287 const char* cached_name = ucnv_getName(cached_converter, &err); | 280 const char* cached_name = ucnv_getName(cached_converter, &err); |
| 288 if (U_SUCCESS(err) && encoding_ == cached_name) { | 281 if (U_SUCCESS(err) && encoding_ == cached_name) { |
| (...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 409 // is resolved, add U+1E3F => 0xE7C7. | 402 // is resolved, add U+1E3F => 0xE7C7. |
| 410 } | 403 } |
| 411 | 404 |
| 412 return resultString; | 405 return resultString; |
| 413 #endif | 406 #endif |
| 414 } | 407 } |
| 415 | 408 |
| 416 #if defined(USING_SYSTEM_ICU) | 409 #if defined(USING_SYSTEM_ICU) |
| 417 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding | 410 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding |
| 418 // spec, but ICU converter does not have them. | 411 // spec, but ICU converter does not have them. |
| 419 static UChar fallbackForGBK(UChar32 character) { | 412 static UChar FallbackForGBK(UChar32 character) { |
| 420 switch (character) { | 413 switch (character) { |
| 421 case 0x01F9: | 414 case 0x01F9: |
| 422 return 0xE7C8; // mapped to xA8xBF by ICU. | 415 return 0xE7C8; // mapped to xA8xBF by ICU. |
| 423 case 0x1E3F: | 416 case 0x1E3F: |
| 424 return 0xE7C7; // mapped to xA8xBC by ICU. | 417 return 0xE7C7; // mapped to xA8xBC by ICU. |
| 425 } | 418 } |
| 426 return 0; | 419 return 0; |
| 427 } | 420 } |
| 428 #endif | 421 #endif |
| 429 | 422 |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 487 UChar32 code_point, | 480 UChar32 code_point, |
| 488 UConverterCallbackReason reason, | 481 UConverterCallbackReason reason, |
| 489 UErrorCode* err) { | 482 UErrorCode* err) { |
| 490 FormatEscapedEntityCallback(context, from_u_args, code_units, length, | 483 FormatEscapedEntityCallback(context, from_u_args, code_units, length, |
| 491 code_point, reason, err, | 484 code_point, reason, err, |
| 492 kURLEncodedEntitiesForUnencodables); | 485 kURLEncodedEntitiesForUnencodables); |
| 493 } | 486 } |
| 494 | 487 |
| 495 #if defined(USING_SYSTEM_ICU) | 488 #if defined(USING_SYSTEM_ICU) |
| 496 // Substitutes special GBK characters, escaping all other unassigned entities. | 489 // Substitutes special GBK characters, escaping all other unassigned entities. |
| 497 static void gbkCallbackEscape(const void* context, | 490 static void GbkCallbackEscape(const void* context, |
| 498 UConverterFromUnicodeArgs* fromUArgs, | 491 UConverterFromUnicodeArgs* from_unicode_args, |
| 499 const UChar* codeUnits, | 492 const UChar* code_units, |
| 500 int32_t length, | 493 int32_t length, |
| 501 UChar32 codePoint, | 494 UChar32 code_point, |
| 502 UConverterCallbackReason reason, | 495 UConverterCallbackReason reason, |
| 503 UErrorCode* err) { | 496 UErrorCode* err) { |
| 504 UChar outChar; | 497 UChar out_char; |
| 505 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { | 498 if (reason == UCNV_UNASSIGNED && (out_char = FallbackForGBK(code_point))) { |
| 506 const UChar* source = &outChar; | 499 const UChar* source = &out_char; |
| 507 *err = U_ZERO_ERROR; | 500 *err = U_ZERO_ERROR; |
| 508 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 501 ucnv_cbFromUWriteUChars(from_unicode_args, &source, source + 1, 0, err); |
| 509 return; | 502 return; |
| 510 } | 503 } |
| 511 NumericEntityCallback(context, fromUArgs, codeUnits, length, codePoint, | 504 NumericEntityCallback(context, from_unicode_args, code_units, length, |
| 512 reason, err); | 505 code_point, reason, err); |
| 513 } | 506 } |
| 514 | 507 |
| 515 // Combines both gbkCssEscapedEntityCallback and GBK character substitution. | 508 // Combines both gbkCssEscapedEntityCallback and GBK character substitution. |
| 516 static void gbkCssEscapedEntityCallack(const void* context, | 509 static void GbkCssEscapedEntityCallack( |
| 517 UConverterFromUnicodeArgs* fromUArgs, | 510 const void* context, |
| 518 const UChar* codeUnits, | 511 UConverterFromUnicodeArgs* from_unicode_args, |
| 519 int32_t length, | 512 const UChar* code_units, |
| 520 UChar32 codePoint, | 513 int32_t length, |
| 521 UConverterCallbackReason reason, | 514 UChar32 code_point, |
| 522 UErrorCode* err) { | 515 UConverterCallbackReason reason, |
| 516 UErrorCode* err) { |
| 523 if (reason == UCNV_UNASSIGNED) { | 517 if (reason == UCNV_UNASSIGNED) { |
| 524 if (UChar outChar = fallbackForGBK(codePoint)) { | 518 if (UChar out_char = FallbackForGBK(code_point)) { |
| 525 const UChar* source = &outChar; | 519 const UChar* source = &out_char; |
| 526 *err = U_ZERO_ERROR; | 520 *err = U_ZERO_ERROR; |
| 527 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 521 ucnv_cbFromUWriteUChars(from_unicode_args, &source, source + 1, 0, err); |
| 528 return; | 522 return; |
| 529 } | 523 } |
| 530 CssEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, | 524 CssEscapedEntityCallback(context, from_unicode_args, code_units, length, |
| 531 reason, err); | 525 code_point, reason, err); |
| 532 return; | 526 return; |
| 533 } | 527 } |
| 534 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, | 528 UCNV_FROM_U_CALLBACK_ESCAPE(context, from_unicode_args, code_units, length, |
| 535 reason, err); | 529 code_point, reason, err); |
| 536 } | 530 } |
| 537 | 531 |
| 538 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution. | 532 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution. |
| 539 static void gbkUrlEscapedEntityCallack(const void* context, | 533 static void GbkUrlEscapedEntityCallack( |
| 540 UConverterFromUnicodeArgs* fromUArgs, | 534 const void* context, |
| 541 const UChar* codeUnits, | 535 UConverterFromUnicodeArgs* from_unicode_args, |
| 542 int32_t length, | 536 const UChar* code_units, |
| 543 UChar32 codePoint, | 537 int32_t length, |
| 544 UConverterCallbackReason reason, | 538 UChar32 code_point, |
| 545 UErrorCode* err) { | 539 UConverterCallbackReason reason, |
| 540 UErrorCode* err) { |
| 546 if (reason == UCNV_UNASSIGNED) { | 541 if (reason == UCNV_UNASSIGNED) { |
| 547 if (UChar outChar = fallbackForGBK(codePoint)) { | 542 if (UChar out_char = FallbackForGBK(code_point)) { |
| 548 const UChar* source = &outChar; | 543 const UChar* source = &out_char; |
| 549 *err = U_ZERO_ERROR; | 544 *err = U_ZERO_ERROR; |
| 550 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 545 ucnv_cbFromUWriteUChars(from_unicode_args, &source, source + 1, 0, err); |
| 551 return; | 546 return; |
| 552 } | 547 } |
| 553 UrlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, | 548 UrlEscapedEntityCallback(context, from_unicode_args, code_units, length, |
| 554 reason, err); | 549 code_point, reason, err); |
| 555 return; | 550 return; |
| 556 } | 551 } |
| 557 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, | 552 UCNV_FROM_U_CALLBACK_ESCAPE(context, from_unicode_args, code_units, length, |
| 558 reason, err); | 553 code_point, reason, err); |
| 559 } | 554 } |
| 560 | 555 |
| 561 static void gbkCallbackSubstitute(const void* context, | 556 static void GbkCallbackSubstitute(const void* context, |
| 562 UConverterFromUnicodeArgs* fromUArgs, | 557 UConverterFromUnicodeArgs* from_unicode_args, |
| 563 const UChar* codeUnits, | 558 const UChar* code_units, |
| 564 int32_t length, | 559 int32_t length, |
| 565 UChar32 codePoint, | 560 UChar32 code_point, |
| 566 UConverterCallbackReason reason, | 561 UConverterCallbackReason reason, |
| 567 UErrorCode* err) { | 562 UErrorCode* err) { |
| 568 UChar outChar; | 563 UChar out_char; |
| 569 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { | 564 if (reason == UCNV_UNASSIGNED && (out_char = FallbackForGBK(code_point))) { |
| 570 const UChar* source = &outChar; | 565 const UChar* source = &out_char; |
| 571 *err = U_ZERO_ERROR; | 566 *err = U_ZERO_ERROR; |
| 572 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 567 ucnv_cbFromUWriteUChars(from_unicode_args, &source, source + 1, 0, err); |
| 573 return; | 568 return; |
| 574 } | 569 } |
| 575 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, | 570 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, from_unicode_args, code_units, |
| 576 codePoint, reason, err); | 571 length, code_point, reason, err); |
| 577 } | 572 } |
| 578 #endif // USING_SYSTEM_ICU | 573 #endif // USING_SYSTEM_ICU |
| 579 | 574 |
| 580 class TextCodecInput final { | 575 class TextCodecInput final { |
| 581 STACK_ALLOCATED(); | 576 STACK_ALLOCATED(); |
| 582 | 577 |
| 583 public: | 578 public: |
| 584 TextCodecInput(const TextEncoding& encoding, | 579 TextCodecInput(const TextEncoding& encoding, |
| 585 const UChar* characters, | 580 const UChar* characters, |
| 586 size_t length) | 581 size_t length) |
| (...skipping 29 matching lines...) Expand all Loading... |
| 616 case kQuestionMarksForUnencodables: | 611 case kQuestionMarksForUnencodables: |
| 617 // Non-byte-based encodings (i.e. UTF-16/32) don't need substitutions | 612 // Non-byte-based encodings (i.e. UTF-16/32) don't need substitutions |
| 618 // since they can encode any code point, and ucnv_setSubstChars would | 613 // since they can encode any code point, and ucnv_setSubstChars would |
| 619 // require a multi-byte substitution anyway. | 614 // require a multi-byte substitution anyway. |
| 620 if (!encoding_.IsNonByteBasedEncoding()) | 615 if (!encoding_.IsNonByteBasedEncoding()) |
| 621 ucnv_setSubstChars(converter_icu_, "?", 1, &err); | 616 ucnv_setSubstChars(converter_icu_, "?", 1, &err); |
| 622 #if !defined(USING_SYSTEM_ICU) | 617 #if !defined(USING_SYSTEM_ICU) |
| 623 ucnv_setFromUCallBack(converter_icu_, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, | 618 ucnv_setFromUCallBack(converter_icu_, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, |
| 624 0, 0, &err); | 619 0, 0, &err); |
| 625 #else | 620 #else |
| 626 ucnv_setFromUCallBack( | 621 ucnv_setFromUCallBack(converter_icu_, |
| 627 converter_icu_, m_needsGBKFallbacks ? gbkCallbackSubstitute | 622 needs_gbk_fallbacks_ |
| 628 : UCNV_FROM_U_CALLBACK_SUBSTITUTE, | 623 ? GbkCallbackSubstitute |
| 629 0, 0, 0, &err); | 624 : UCNV_FROM_U_CALLBACK_SUBSTITUTE, |
| 625 0, 0, 0, &err); |
| 630 #endif | 626 #endif |
| 631 break; | 627 break; |
| 632 case kEntitiesForUnencodables: | 628 case kEntitiesForUnencodables: |
| 633 #if !defined(USING_SYSTEM_ICU) | 629 #if !defined(USING_SYSTEM_ICU) |
| 634 ucnv_setFromUCallBack(converter_icu_, NumericEntityCallback, 0, 0, 0, | 630 ucnv_setFromUCallBack(converter_icu_, NumericEntityCallback, 0, 0, 0, |
| 635 &err); | 631 &err); |
| 636 #else | 632 #else |
| 637 ucnv_setFromUCallBack( | 633 ucnv_setFromUCallBack( |
| 638 converter_icu_, | 634 converter_icu_, |
| 639 m_needsGBKFallbacks ? gbkCallbackEscape : NumericEntityCallback, 0, 0, | 635 needs_gbk_fallbacks_ ? GbkCallbackEscape : NumericEntityCallback, 0, |
| 640 0, &err); | 636 0, 0, &err); |
| 641 #endif | 637 #endif |
| 642 break; | 638 break; |
| 643 case kURLEncodedEntitiesForUnencodables: | 639 case kURLEncodedEntitiesForUnencodables: |
| 644 #if !defined(USING_SYSTEM_ICU) | 640 #if !defined(USING_SYSTEM_ICU) |
| 645 ucnv_setFromUCallBack(converter_icu_, UrlEscapedEntityCallback, 0, 0, 0, | 641 ucnv_setFromUCallBack(converter_icu_, UrlEscapedEntityCallback, 0, 0, 0, |
| 646 &err); | 642 &err); |
| 647 #else | 643 #else |
| 648 ucnv_setFromUCallBack(converter_icu_, | 644 ucnv_setFromUCallBack(converter_icu_, |
| 649 m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack | 645 needs_gbk_fallbacks_ ? GbkUrlEscapedEntityCallack |
| 650 : UrlEscapedEntityCallback, | 646 : UrlEscapedEntityCallback, |
| 651 0, 0, 0, &err); | 647 0, 0, 0, &err); |
| 652 #endif | 648 #endif |
| 653 break; | 649 break; |
| 654 case kCSSEncodedEntitiesForUnencodables: | 650 case kCSSEncodedEntitiesForUnencodables: |
| 655 #if !defined(USING_SYSTEM_ICU) | 651 #if !defined(USING_SYSTEM_ICU) |
| 656 ucnv_setFromUCallBack(converter_icu_, CssEscapedEntityCallback, 0, 0, 0, | 652 ucnv_setFromUCallBack(converter_icu_, CssEscapedEntityCallback, 0, 0, 0, |
| 657 &err); | 653 &err); |
| 658 #else | 654 #else |
| 659 ucnv_setFromUCallBack(converter_icu_, | 655 ucnv_setFromUCallBack(converter_icu_, |
| 660 m_needsGBKFallbacks ? gbkCssEscapedEntityCallack | 656 needs_gbk_fallbacks_ ? GbkCssEscapedEntityCallack |
| 661 : CssEscapedEntityCallback, | 657 : CssEscapedEntityCallback, |
| 662 0, 0, 0, &err); | 658 0, 0, 0, &err); |
| 663 #endif | 659 #endif |
| 664 break; | 660 break; |
| 665 } | 661 } |
| 666 | 662 |
| 667 DCHECK(U_SUCCESS(err)); | 663 DCHECK(U_SUCCESS(err)); |
| 668 if (U_FAILURE(err)) | 664 if (U_FAILURE(err)) |
| 669 return CString(); | 665 return CString(); |
| 670 | 666 |
| 671 Vector<char> result; | 667 Vector<char> result; |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 708 return EncodeCommon(characters, length, handling); | 704 return EncodeCommon(characters, length, handling); |
| 709 } | 705 } |
| 710 | 706 |
| 711 CString TextCodecICU::Encode(const LChar* characters, | 707 CString TextCodecICU::Encode(const LChar* characters, |
| 712 size_t length, | 708 size_t length, |
| 713 UnencodableHandling handling) { | 709 UnencodableHandling handling) { |
| 714 return EncodeCommon(characters, length, handling); | 710 return EncodeCommon(characters, length, handling); |
| 715 } | 711 } |
| 716 | 712 |
| 717 } // namespace WTF | 713 } // namespace WTF |
| OLD | NEW |