OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. |
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
4 * | 4 * |
5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
7 * are met: | 7 * are met: |
8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
240 error = U_ZERO_ERROR; | 240 error = U_ZERO_ERROR; |
241 standard_name = ucnv_getStandardName(name, "IANA", &error); | 241 standard_name = ucnv_getStandardName(name, "IANA", &error); |
242 if (!U_SUCCESS(error) || !standard_name) | 242 if (!U_SUCCESS(error) || !standard_name) |
243 continue; | 243 continue; |
244 } | 244 } |
245 registrar(standard_name, Create, 0); | 245 registrar(standard_name, Create, 0); |
246 } | 246 } |
247 } | 247 } |
248 | 248 |
249 TextCodecICU::TextCodecICU(const TextEncoding& encoding) | 249 TextCodecICU::TextCodecICU(const TextEncoding& encoding) |
250 : encoding_(encoding), | 250 : encoding_(encoding) {} |
251 converter_icu_(0) | |
252 #if defined(USING_SYSTEM_ICU) | |
253 , | |
254 m_needsGBKFallbacks(false) | |
255 #endif | |
256 { | |
257 } | |
258 | 251 |
259 TextCodecICU::~TextCodecICU() { | 252 TextCodecICU::~TextCodecICU() { |
260 ReleaseICUConverter(); | 253 ReleaseICUConverter(); |
261 } | 254 } |
262 | 255 |
263 void TextCodecICU::ReleaseICUConverter() const { | 256 void TextCodecICU::ReleaseICUConverter() const { |
264 if (converter_icu_) { | 257 if (converter_icu_) { |
265 UConverter*& cached_converter = CachedConverterICU(); | 258 UConverter*& cached_converter = CachedConverterICU(); |
266 if (cached_converter) | 259 if (cached_converter) |
267 ucnv_close(cached_converter); | 260 ucnv_close(cached_converter); |
268 cached_converter = converter_icu_; | 261 cached_converter = converter_icu_; |
269 converter_icu_ = 0; | 262 converter_icu_ = nullptr; |
270 } | 263 } |
271 } | 264 } |
272 | 265 |
273 void TextCodecICU::CreateICUConverter() const { | 266 void TextCodecICU::CreateICUConverter() const { |
274 DCHECK(!converter_icu_); | 267 DCHECK(!converter_icu_); |
275 | 268 |
276 #if defined(USING_SYSTEM_ICU) | 269 #if defined(USING_SYSTEM_ICU) |
277 const char* name = encoding_.GetName(); | 270 const char* name = encoding_.GetName(); |
278 m_needsGBKFallbacks = | 271 needs_gbk_fallbacks_ = |
279 name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3]; | 272 name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3]; |
280 #endif | 273 #endif |
281 | 274 |
282 UErrorCode err; | 275 UErrorCode err; |
283 | 276 |
284 UConverter*& cached_converter = CachedConverterICU(); | 277 UConverter*& cached_converter = CachedConverterICU(); |
285 if (cached_converter) { | 278 if (cached_converter) { |
286 err = U_ZERO_ERROR; | 279 err = U_ZERO_ERROR; |
287 const char* cached_name = ucnv_getName(cached_converter, &err); | 280 const char* cached_name = ucnv_getName(cached_converter, &err); |
288 if (U_SUCCESS(err) && encoding_ == cached_name) { | 281 if (U_SUCCESS(err) && encoding_ == cached_name) { |
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
409 // is resolved, add U+1E3F => 0xE7C7. | 402 // is resolved, add U+1E3F => 0xE7C7. |
410 } | 403 } |
411 | 404 |
412 return resultString; | 405 return resultString; |
413 #endif | 406 #endif |
414 } | 407 } |
415 | 408 |
416 #if defined(USING_SYSTEM_ICU) | 409 #if defined(USING_SYSTEM_ICU) |
417 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding | 410 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding |
418 // spec, but ICU converter does not have them. | 411 // spec, but ICU converter does not have them. |
419 static UChar fallbackForGBK(UChar32 character) { | 412 static UChar FallbackForGBK(UChar32 character) { |
420 switch (character) { | 413 switch (character) { |
421 case 0x01F9: | 414 case 0x01F9: |
422 return 0xE7C8; // mapped to xA8xBF by ICU. | 415 return 0xE7C8; // mapped to xA8xBF by ICU. |
423 case 0x1E3F: | 416 case 0x1E3F: |
424 return 0xE7C7; // mapped to xA8xBC by ICU. | 417 return 0xE7C7; // mapped to xA8xBC by ICU. |
425 } | 418 } |
426 return 0; | 419 return 0; |
427 } | 420 } |
428 #endif | 421 #endif |
429 | 422 |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
487 UChar32 code_point, | 480 UChar32 code_point, |
488 UConverterCallbackReason reason, | 481 UConverterCallbackReason reason, |
489 UErrorCode* err) { | 482 UErrorCode* err) { |
490 FormatEscapedEntityCallback(context, from_u_args, code_units, length, | 483 FormatEscapedEntityCallback(context, from_u_args, code_units, length, |
491 code_point, reason, err, | 484 code_point, reason, err, |
492 kURLEncodedEntitiesForUnencodables); | 485 kURLEncodedEntitiesForUnencodables); |
493 } | 486 } |
494 | 487 |
495 #if defined(USING_SYSTEM_ICU) | 488 #if defined(USING_SYSTEM_ICU) |
496 // Substitutes special GBK characters, escaping all other unassigned entities. | 489 // Substitutes special GBK characters, escaping all other unassigned entities. |
497 static void gbkCallbackEscape(const void* context, | 490 static void GbkCallbackEscape(const void* context, |
498 UConverterFromUnicodeArgs* fromUArgs, | 491 UConverterFromUnicodeArgs* from_unicode_args, |
499 const UChar* codeUnits, | 492 const UChar* code_units, |
500 int32_t length, | 493 int32_t length, |
501 UChar32 codePoint, | 494 UChar32 code_point, |
502 UConverterCallbackReason reason, | 495 UConverterCallbackReason reason, |
503 UErrorCode* err) { | 496 UErrorCode* err) { |
504 UChar outChar; | 497 UChar out_char; |
505 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { | 498 if (reason == UCNV_UNASSIGNED && (out_char = FallbackForGBK(code_point))) { |
506 const UChar* source = &outChar; | 499 const UChar* source = &out_char; |
507 *err = U_ZERO_ERROR; | 500 *err = U_ZERO_ERROR; |
508 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 501 ucnv_cbFromUWriteUChars(from_unicode_args, &source, source + 1, 0, err); |
509 return; | 502 return; |
510 } | 503 } |
511 NumericEntityCallback(context, fromUArgs, codeUnits, length, codePoint, | 504 NumericEntityCallback(context, from_unicode_args, code_units, length, |
512 reason, err); | 505 code_point, reason, err); |
513 } | 506 } |
514 | 507 |
515 // Combines both gbkCssEscapedEntityCallback and GBK character substitution. | 508 // Combines both gbkCssEscapedEntityCallback and GBK character substitution. |
516 static void gbkCssEscapedEntityCallack(const void* context, | 509 static void GbkCssEscapedEntityCallack( |
517 UConverterFromUnicodeArgs* fromUArgs, | 510 const void* context, |
518 const UChar* codeUnits, | 511 UConverterFromUnicodeArgs* from_unicode_args, |
519 int32_t length, | 512 const UChar* code_units, |
520 UChar32 codePoint, | 513 int32_t length, |
521 UConverterCallbackReason reason, | 514 UChar32 code_point, |
522 UErrorCode* err) { | 515 UConverterCallbackReason reason, |
| 516 UErrorCode* err) { |
523 if (reason == UCNV_UNASSIGNED) { | 517 if (reason == UCNV_UNASSIGNED) { |
524 if (UChar outChar = fallbackForGBK(codePoint)) { | 518 if (UChar out_char = FallbackForGBK(code_point)) { |
525 const UChar* source = &outChar; | 519 const UChar* source = &out_char; |
526 *err = U_ZERO_ERROR; | 520 *err = U_ZERO_ERROR; |
527 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 521 ucnv_cbFromUWriteUChars(from_unicode_args, &source, source + 1, 0, err); |
528 return; | 522 return; |
529 } | 523 } |
530 CssEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, | 524 CssEscapedEntityCallback(context, from_unicode_args, code_units, length, |
531 reason, err); | 525 code_point, reason, err); |
532 return; | 526 return; |
533 } | 527 } |
534 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, | 528 UCNV_FROM_U_CALLBACK_ESCAPE(context, from_unicode_args, code_units, length, |
535 reason, err); | 529 code_point, reason, err); |
536 } | 530 } |
537 | 531 |
538 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution. | 532 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution. |
539 static void gbkUrlEscapedEntityCallack(const void* context, | 533 static void GbkUrlEscapedEntityCallack( |
540 UConverterFromUnicodeArgs* fromUArgs, | 534 const void* context, |
541 const UChar* codeUnits, | 535 UConverterFromUnicodeArgs* from_unicode_args, |
542 int32_t length, | 536 const UChar* code_units, |
543 UChar32 codePoint, | 537 int32_t length, |
544 UConverterCallbackReason reason, | 538 UChar32 code_point, |
545 UErrorCode* err) { | 539 UConverterCallbackReason reason, |
| 540 UErrorCode* err) { |
546 if (reason == UCNV_UNASSIGNED) { | 541 if (reason == UCNV_UNASSIGNED) { |
547 if (UChar outChar = fallbackForGBK(codePoint)) { | 542 if (UChar out_char = FallbackForGBK(code_point)) { |
548 const UChar* source = &outChar; | 543 const UChar* source = &out_char; |
549 *err = U_ZERO_ERROR; | 544 *err = U_ZERO_ERROR; |
550 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 545 ucnv_cbFromUWriteUChars(from_unicode_args, &source, source + 1, 0, err); |
551 return; | 546 return; |
552 } | 547 } |
553 UrlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, | 548 UrlEscapedEntityCallback(context, from_unicode_args, code_units, length, |
554 reason, err); | 549 code_point, reason, err); |
555 return; | 550 return; |
556 } | 551 } |
557 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, | 552 UCNV_FROM_U_CALLBACK_ESCAPE(context, from_unicode_args, code_units, length, |
558 reason, err); | 553 code_point, reason, err); |
559 } | 554 } |
560 | 555 |
561 static void gbkCallbackSubstitute(const void* context, | 556 static void GbkCallbackSubstitute(const void* context, |
562 UConverterFromUnicodeArgs* fromUArgs, | 557 UConverterFromUnicodeArgs* from_unicode_args, |
563 const UChar* codeUnits, | 558 const UChar* code_units, |
564 int32_t length, | 559 int32_t length, |
565 UChar32 codePoint, | 560 UChar32 code_point, |
566 UConverterCallbackReason reason, | 561 UConverterCallbackReason reason, |
567 UErrorCode* err) { | 562 UErrorCode* err) { |
568 UChar outChar; | 563 UChar out_char; |
569 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { | 564 if (reason == UCNV_UNASSIGNED && (out_char = FallbackForGBK(code_point))) { |
570 const UChar* source = &outChar; | 565 const UChar* source = &out_char; |
571 *err = U_ZERO_ERROR; | 566 *err = U_ZERO_ERROR; |
572 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); | 567 ucnv_cbFromUWriteUChars(from_unicode_args, &source, source + 1, 0, err); |
573 return; | 568 return; |
574 } | 569 } |
575 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, | 570 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, from_unicode_args, code_units, |
576 codePoint, reason, err); | 571 length, code_point, reason, err); |
577 } | 572 } |
578 #endif // USING_SYSTEM_ICU | 573 #endif // USING_SYSTEM_ICU |
579 | 574 |
580 class TextCodecInput final { | 575 class TextCodecInput final { |
581 STACK_ALLOCATED(); | 576 STACK_ALLOCATED(); |
582 | 577 |
583 public: | 578 public: |
584 TextCodecInput(const TextEncoding& encoding, | 579 TextCodecInput(const TextEncoding& encoding, |
585 const UChar* characters, | 580 const UChar* characters, |
586 size_t length) | 581 size_t length) |
(...skipping 29 matching lines...) Expand all Loading... |
616 case kQuestionMarksForUnencodables: | 611 case kQuestionMarksForUnencodables: |
617 // Non-byte-based encodings (i.e. UTF-16/32) don't need substitutions | 612 // Non-byte-based encodings (i.e. UTF-16/32) don't need substitutions |
618 // since they can encode any code point, and ucnv_setSubstChars would | 613 // since they can encode any code point, and ucnv_setSubstChars would |
619 // require a multi-byte substitution anyway. | 614 // require a multi-byte substitution anyway. |
620 if (!encoding_.IsNonByteBasedEncoding()) | 615 if (!encoding_.IsNonByteBasedEncoding()) |
621 ucnv_setSubstChars(converter_icu_, "?", 1, &err); | 616 ucnv_setSubstChars(converter_icu_, "?", 1, &err); |
622 #if !defined(USING_SYSTEM_ICU) | 617 #if !defined(USING_SYSTEM_ICU) |
623 ucnv_setFromUCallBack(converter_icu_, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, | 618 ucnv_setFromUCallBack(converter_icu_, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, |
624 0, 0, &err); | 619 0, 0, &err); |
625 #else | 620 #else |
626 ucnv_setFromUCallBack( | 621 ucnv_setFromUCallBack(converter_icu_, |
627 converter_icu_, m_needsGBKFallbacks ? gbkCallbackSubstitute | 622 needs_gbk_fallbacks_ |
628 : UCNV_FROM_U_CALLBACK_SUBSTITUTE, | 623 ? GbkCallbackSubstitute |
629 0, 0, 0, &err); | 624 : UCNV_FROM_U_CALLBACK_SUBSTITUTE, |
| 625 0, 0, 0, &err); |
630 #endif | 626 #endif |
631 break; | 627 break; |
632 case kEntitiesForUnencodables: | 628 case kEntitiesForUnencodables: |
633 #if !defined(USING_SYSTEM_ICU) | 629 #if !defined(USING_SYSTEM_ICU) |
634 ucnv_setFromUCallBack(converter_icu_, NumericEntityCallback, 0, 0, 0, | 630 ucnv_setFromUCallBack(converter_icu_, NumericEntityCallback, 0, 0, 0, |
635 &err); | 631 &err); |
636 #else | 632 #else |
637 ucnv_setFromUCallBack( | 633 ucnv_setFromUCallBack( |
638 converter_icu_, | 634 converter_icu_, |
639 m_needsGBKFallbacks ? gbkCallbackEscape : NumericEntityCallback, 0, 0, | 635 needs_gbk_fallbacks_ ? GbkCallbackEscape : NumericEntityCallback, 0, |
640 0, &err); | 636 0, 0, &err); |
641 #endif | 637 #endif |
642 break; | 638 break; |
643 case kURLEncodedEntitiesForUnencodables: | 639 case kURLEncodedEntitiesForUnencodables: |
644 #if !defined(USING_SYSTEM_ICU) | 640 #if !defined(USING_SYSTEM_ICU) |
645 ucnv_setFromUCallBack(converter_icu_, UrlEscapedEntityCallback, 0, 0, 0, | 641 ucnv_setFromUCallBack(converter_icu_, UrlEscapedEntityCallback, 0, 0, 0, |
646 &err); | 642 &err); |
647 #else | 643 #else |
648 ucnv_setFromUCallBack(converter_icu_, | 644 ucnv_setFromUCallBack(converter_icu_, |
649 m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack | 645 needs_gbk_fallbacks_ ? GbkUrlEscapedEntityCallack |
650 : UrlEscapedEntityCallback, | 646 : UrlEscapedEntityCallback, |
651 0, 0, 0, &err); | 647 0, 0, 0, &err); |
652 #endif | 648 #endif |
653 break; | 649 break; |
654 case kCSSEncodedEntitiesForUnencodables: | 650 case kCSSEncodedEntitiesForUnencodables: |
655 #if !defined(USING_SYSTEM_ICU) | 651 #if !defined(USING_SYSTEM_ICU) |
656 ucnv_setFromUCallBack(converter_icu_, CssEscapedEntityCallback, 0, 0, 0, | 652 ucnv_setFromUCallBack(converter_icu_, CssEscapedEntityCallback, 0, 0, 0, |
657 &err); | 653 &err); |
658 #else | 654 #else |
659 ucnv_setFromUCallBack(converter_icu_, | 655 ucnv_setFromUCallBack(converter_icu_, |
660 m_needsGBKFallbacks ? gbkCssEscapedEntityCallack | 656 needs_gbk_fallbacks_ ? GbkCssEscapedEntityCallack |
661 : CssEscapedEntityCallback, | 657 : CssEscapedEntityCallback, |
662 0, 0, 0, &err); | 658 0, 0, 0, &err); |
663 #endif | 659 #endif |
664 break; | 660 break; |
665 } | 661 } |
666 | 662 |
667 DCHECK(U_SUCCESS(err)); | 663 DCHECK(U_SUCCESS(err)); |
668 if (U_FAILURE(err)) | 664 if (U_FAILURE(err)) |
669 return CString(); | 665 return CString(); |
670 | 666 |
671 Vector<char> result; | 667 Vector<char> result; |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
708 return EncodeCommon(characters, length, handling); | 704 return EncodeCommon(characters, length, handling); |
709 } | 705 } |
710 | 706 |
711 CString TextCodecICU::Encode(const LChar* characters, | 707 CString TextCodecICU::Encode(const LChar* characters, |
712 size_t length, | 708 size_t length, |
713 UnencodableHandling handling) { | 709 UnencodableHandling handling) { |
714 return EncodeCommon(characters, length, handling); | 710 return EncodeCommon(characters, length, handling); |
715 } | 711 } |
716 | 712 |
717 } // namespace WTF | 713 } // namespace WTF |
OLD | NEW |