OLD | NEW |
1 /* | 1 /* |
2 * (C) 1999 Lars Knoll (knoll@kde.org) | 2 * (C) 1999 Lars Knoll (knoll@kde.org) |
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights
reserved. | 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights
reserved. |
4 * Copyright (C) 2007-2009 Torch Mobile, Inc. | 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. |
5 * | 5 * |
6 * This library is free software; you can redistribute it and/or | 6 * This library is free software; you can redistribute it and/or |
7 * modify it under the terms of the GNU Library General Public | 7 * modify it under the terms of the GNU Library General Public |
8 * License as published by the Free Software Foundation; either | 8 * License as published by the Free Software Foundation; either |
9 * version 2 of the License, or (at your option) any later version. | 9 * version 2 of the License, or (at your option) any later version. |
10 * | 10 * |
(...skipping 774 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
785 | 785 |
786 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec
k room is available. | 786 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec
k room is available. |
787 static inline void putUTF8Triple(char*& buffer, UChar ch) | 787 static inline void putUTF8Triple(char*& buffer, UChar ch) |
788 { | 788 { |
789 ASSERT(ch >= 0x0800); | 789 ASSERT(ch >= 0x0800); |
790 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); | 790 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); |
791 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); | 791 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); |
792 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); | 792 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); |
793 } | 793 } |
794 | 794 |
795 CString String::utf8(ConversionMode mode) const | 795 CString String::utf8(UTF8ConversionMode mode) const |
796 { | 796 { |
797 unsigned length = this->length(); | 797 unsigned length = this->length(); |
798 | 798 |
799 if (!length) | 799 if (!length) |
800 return CString("", 0); | 800 return CString("", 0); |
801 | 801 |
802 // Allocate a buffer big enough to hold all the characters | 802 // Allocate a buffer big enough to hold all the characters |
803 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). | 803 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). |
804 // Optimization ideas, if we find this function is hot: | 804 // Optimization ideas, if we find this function is hot: |
805 // * We could speculatively create a CStringBuffer to contain 'length' | 805 // * We could speculatively create a CStringBuffer to contain 'length' |
(...skipping 10 matching lines...) Expand all Loading... |
816 char* buffer = bufferVector.data(); | 816 char* buffer = bufferVector.data(); |
817 | 817 |
818 if (is8Bit()) { | 818 if (is8Bit()) { |
819 const LChar* characters = this->characters8(); | 819 const LChar* characters = this->characters8(); |
820 | 820 |
821 ConversionResult result = convertLatin1ToUTF8(&characters, characters +
length, &buffer, buffer + bufferVector.size()); | 821 ConversionResult result = convertLatin1ToUTF8(&characters, characters +
length, &buffer, buffer + bufferVector.size()); |
822 ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should
be sufficient for any conversion | 822 ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should
be sufficient for any conversion |
823 } else { | 823 } else { |
824 const UChar* characters = this->characters16(); | 824 const UChar* characters = this->characters16(); |
825 | 825 |
826 if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) { | 826 if (mode == StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) { |
827 const UChar* charactersEnd = characters + length; | 827 const UChar* charactersEnd = characters + length; |
828 char* bufferEnd = buffer + bufferVector.size(); | 828 char* bufferEnd = buffer + bufferVector.size(); |
829 while (characters < charactersEnd) { | 829 while (characters < charactersEnd) { |
830 // Use strict conversion to detect unpaired surrogates. | 830 // Use strict conversion to detect unpaired surrogates. |
831 ConversionResult result = convertUTF16ToUTF8(&characters, charac
tersEnd, &buffer, bufferEnd, true); | 831 ConversionResult result = convertUTF16ToUTF8(&characters, charac
tersEnd, &buffer, bufferEnd, true); |
832 ASSERT(result != targetExhausted); | 832 ASSERT(result != targetExhausted); |
833 // Conversion fails when there is an unpaired surrogate. | 833 // Conversion fails when there is an unpaired surrogate. |
834 // Put replacement character (U+FFFD) instead of the unpaired su
rrogate. | 834 // Put replacement character (U+FFFD) instead of the unpaired su
rrogate. |
835 if (result != conversionOK) { | 835 if (result != conversionOK) { |
836 ASSERT((0xD800 <= *characters && *characters <= 0xDFFF)); | 836 ASSERT((0xD800 <= *characters && *characters <= 0xDFFF)); |
837 // There should be room left, since one UChar hasn't been co
nverted. | 837 // There should be room left, since one UChar hasn't been co
nverted. |
838 ASSERT((buffer + 3) <= bufferEnd); | 838 ASSERT((buffer + 3) <= bufferEnd); |
839 putUTF8Triple(buffer, replacementCharacter); | 839 putUTF8Triple(buffer, replacementCharacter); |
840 ++characters; | 840 ++characters; |
841 } | 841 } |
842 } | 842 } |
843 } else { | 843 } else { |
844 bool strict = mode == StrictConversion; | 844 bool strict = mode == StrictUTF8Conversion; |
845 ConversionResult result = convertUTF16ToUTF8(&characters, characters
+ length, &buffer, buffer + bufferVector.size(), strict); | 845 ConversionResult result = convertUTF16ToUTF8(&characters, characters
+ length, &buffer, buffer + bufferVector.size(), strict); |
846 ASSERT(result != targetExhausted); // (length * 3) should be suffici
ent for any conversion | 846 ASSERT(result != targetExhausted); // (length * 3) should be suffici
ent for any conversion |
847 | 847 |
848 // Only produced from strict conversion. | 848 // Only produced from strict conversion. |
849 if (result == sourceIllegal) { | 849 if (result == sourceIllegal) { |
850 ASSERT(strict); | 850 ASSERT(strict); |
851 return CString(); | 851 return CString(); |
852 } | 852 } |
853 | 853 |
854 // Check for an unconverted high surrogate. | 854 // Check for an unconverted high surrogate. |
(...skipping 411 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1266 buffer.append('\0'); | 1266 buffer.append('\0'); |
1267 return buffer; | 1267 return buffer; |
1268 } | 1268 } |
1269 | 1269 |
1270 Vector<char> asciiDebug(String& string) | 1270 Vector<char> asciiDebug(String& string) |
1271 { | 1271 { |
1272 return asciiDebug(string.impl()); | 1272 return asciiDebug(string.impl()); |
1273 } | 1273 } |
1274 | 1274 |
1275 #endif | 1275 #endif |
OLD | NEW |