OLD | NEW |
1 /* | 1 /* |
2 * (C) 1999 Lars Knoll (knoll@kde.org) | 2 * (C) 1999 Lars Knoll (knoll@kde.org) |
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights
reserved. | 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights |
| 4 * reserved. |
4 * Copyright (C) 2007-2009 Torch Mobile, Inc. | 5 * Copyright (C) 2007-2009 Torch Mobile, Inc. |
5 * | 6 * |
6 * This library is free software; you can redistribute it and/or | 7 * This library is free software; you can redistribute it and/or |
7 * modify it under the terms of the GNU Library General Public | 8 * modify it under the terms of the GNU Library General Public |
8 * License as published by the Free Software Foundation; either | 9 * License as published by the Free Software Foundation; either |
9 * version 2 of the License, or (at your option) any later version. | 10 * version 2 of the License, or (at your option) any later version. |
10 * | 11 * |
11 * This library is distributed in the hope that it will be useful, | 12 * This library is distributed in the hope that it will be useful, |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
(...skipping 569 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
583 CString result = CString::newUninitialized(length, characterBuffer); | 584 CString result = CString::newUninitialized(length, characterBuffer); |
584 | 585 |
585 for (unsigned i = 0; i < length; ++i) { | 586 for (unsigned i = 0; i < length; ++i) { |
586 UChar ch = characters[i]; | 587 UChar ch = characters[i]; |
587 characterBuffer[i] = ch > 0xff ? '?' : static_cast<char>(ch); | 588 characterBuffer[i] = ch > 0xff ? '?' : static_cast<char>(ch); |
588 } | 589 } |
589 | 590 |
590 return result; | 591 return result; |
591 } | 592 } |
592 | 593 |
593 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec
k room is available. | 594 // Helper to write a three-byte UTF-8 code point to the buffer, caller must |
| 595 // check room is available. |
594 static inline void putUTF8Triple(char*& buffer, UChar ch) { | 596 static inline void putUTF8Triple(char*& buffer, UChar ch) { |
595 ASSERT(ch >= 0x0800); | 597 ASSERT(ch >= 0x0800); |
596 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); | 598 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); |
597 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); | 599 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); |
598 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); | 600 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); |
599 } | 601 } |
600 | 602 |
601 CString String::utf8(UTF8ConversionMode mode) const { | 603 CString String::utf8(UTF8ConversionMode mode) const { |
602 unsigned length = this->length(); | 604 unsigned length = this->length(); |
603 | 605 |
(...skipping 15 matching lines...) Expand all Loading... |
619 Vector<char, 1024> bufferVector(length * 3); | 621 Vector<char, 1024> bufferVector(length * 3); |
620 | 622 |
621 char* buffer = bufferVector.data(); | 623 char* buffer = bufferVector.data(); |
622 | 624 |
623 if (is8Bit()) { | 625 if (is8Bit()) { |
624 const LChar* characters = this->characters8(); | 626 const LChar* characters = this->characters8(); |
625 | 627 |
626 ConversionResult result = | 628 ConversionResult result = |
627 convertLatin1ToUTF8(&characters, characters + length, &buffer, | 629 convertLatin1ToUTF8(&characters, characters + length, &buffer, |
628 buffer + bufferVector.size()); | 630 buffer + bufferVector.size()); |
629 ASSERT_UNUSED( | 631 // (length * 3) should be sufficient for any conversion |
630 result, | 632 ASSERT_UNUSED(result, result != targetExhausted); |
631 result != | |
632 targetExhausted); // (length * 3) should be sufficient for any conv
ersion | |
633 } else { | 633 } else { |
634 const UChar* characters = this->characters16(); | 634 const UChar* characters = this->characters16(); |
635 | 635 |
636 if (mode == StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) { | 636 if (mode == StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) { |
637 const UChar* charactersEnd = characters + length; | 637 const UChar* charactersEnd = characters + length; |
638 char* bufferEnd = buffer + bufferVector.size(); | 638 char* bufferEnd = buffer + bufferVector.size(); |
639 while (characters < charactersEnd) { | 639 while (characters < charactersEnd) { |
640 // Use strict conversion to detect unpaired surrogates. | 640 // Use strict conversion to detect unpaired surrogates. |
641 ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd, | 641 ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd, |
642 &buffer, bufferEnd, true); | 642 &buffer, bufferEnd, true); |
643 ASSERT(result != targetExhausted); | 643 ASSERT(result != targetExhausted); |
644 // Conversion fails when there is an unpaired surrogate. Put | 644 // Conversion fails when there is an unpaired surrogate. Put |
645 // replacement character (U+FFFD) instead of the unpaired | 645 // replacement character (U+FFFD) instead of the unpaired |
646 // surrogate. | 646 // surrogate. |
647 if (result != conversionOK) { | 647 if (result != conversionOK) { |
648 ASSERT((0xD800 <= *characters && *characters <= 0xDFFF)); | 648 ASSERT((0xD800 <= *characters && *characters <= 0xDFFF)); |
649 // There should be room left, since one UChar hasn't been | 649 // There should be room left, since one UChar hasn't been |
650 // converted. | 650 // converted. |
651 ASSERT((buffer + 3) <= bufferEnd); | 651 ASSERT((buffer + 3) <= bufferEnd); |
652 putUTF8Triple(buffer, replacementCharacter); | 652 putUTF8Triple(buffer, replacementCharacter); |
653 ++characters; | 653 ++characters; |
654 } | 654 } |
655 } | 655 } |
656 } else { | 656 } else { |
657 bool strict = mode == StrictUTF8Conversion; | 657 bool strict = mode == StrictUTF8Conversion; |
658 ConversionResult result = | 658 ConversionResult result = |
659 convertUTF16ToUTF8(&characters, characters + length, &buffer, | 659 convertUTF16ToUTF8(&characters, characters + length, &buffer, |
660 buffer + bufferVector.size(), strict); | 660 buffer + bufferVector.size(), strict); |
661 ASSERT( | 661 // (length * 3) should be sufficient for any conversion |
662 result != | 662 ASSERT(result != targetExhausted); |
663 targetExhausted); // (length * 3) should be sufficient for any conver
sion | |
664 | 663 |
665 // Only produced from strict conversion. | 664 // Only produced from strict conversion. |
666 if (result == sourceIllegal) { | 665 if (result == sourceIllegal) { |
667 ASSERT(strict); | 666 ASSERT(strict); |
668 return CString(); | 667 return CString(); |
669 } | 668 } |
670 | 669 |
671 // Check for an unconverted high surrogate. | 670 // Check for an unconverted high surrogate. |
672 if (result == sourceExhausted) { | 671 if (result == sourceExhausted) { |
673 if (strict) | 672 if (strict) |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
845 } | 844 } |
846 buffer.append('\0'); | 845 buffer.append('\0'); |
847 return buffer; | 846 return buffer; |
848 } | 847 } |
849 | 848 |
850 Vector<char> asciiDebug(String& string) { | 849 Vector<char> asciiDebug(String& string) { |
851 return asciiDebug(string.impl()); | 850 return asciiDebug(string.impl()); |
852 } | 851 } |
853 | 852 |
854 #endif | 853 #endif |
OLD | NEW |