| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. | |
| 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | |
| 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. | |
| 5 * | |
| 6 * Redistribution and use in source and binary forms, with or without | |
| 7 * modification, are permitted provided that the following conditions | |
| 8 * are met: | |
| 9 * 1. Redistributions of source code must retain the above copyright | |
| 10 * notice, this list of conditions and the following disclaimer. | |
| 11 * 2. Redistributions in binary form must reproduce the above copyright | |
| 12 * notice, this list of conditions and the following disclaimer in the | |
| 13 * documentation and/or other materials provided with the distribution. | |
| 14 * | |
| 15 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY | |
| 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR | |
| 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 26 */ | |
| 27 | |
| 28 #include "wtf/text/TextEncoding.h" | |
| 29 | |
| 30 #include "wtf/StdLibExtras.h" | |
| 31 #include "wtf/Threading.h" | |
| 32 #include "wtf/text/CString.h" | |
| 33 #include "wtf/text/TextEncodingRegistry.h" | |
| 34 #include "wtf/text/WTFString.h" | |
| 35 #include <memory> | |
| 36 | |
| 37 namespace WTF { | |
| 38 | |
| 39 static const TextEncoding& UTF7Encoding() { | |
| 40 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF7Encoding, | |
| 41 new TextEncoding("UTF-7")); | |
| 42 return globalUTF7Encoding; | |
| 43 } | |
| 44 | |
| 45 TextEncoding::TextEncoding(const char* name) | |
| 46 : m_name(atomicCanonicalTextEncodingName(name)) { | |
| 47 // Aliases are valid, but not "replacement" itself. | |
| 48 if (m_name && isReplacementEncoding(name)) | |
| 49 m_name = 0; | |
| 50 } | |
| 51 | |
| 52 TextEncoding::TextEncoding(const String& name) | |
| 53 : m_name(atomicCanonicalTextEncodingName(name)) { | |
| 54 // Aliases are valid, but not "replacement" itself. | |
| 55 if (m_name && isReplacementEncoding(name)) | |
| 56 m_name = 0; | |
| 57 } | |
| 58 | |
| 59 String TextEncoding::decode(const char* data, | |
| 60 size_t length, | |
| 61 bool stopOnError, | |
| 62 bool& sawError) const { | |
| 63 if (!m_name) | |
| 64 return String(); | |
| 65 | |
| 66 return newTextCodec(*this)->decode(data, length, DataEOF, stopOnError, | |
| 67 sawError); | |
| 68 } | |
| 69 | |
| 70 CString TextEncoding::encode(const String& string, | |
| 71 UnencodableHandling handling) const { | |
| 72 if (!m_name) | |
| 73 return CString(); | |
| 74 | |
| 75 if (string.isEmpty()) | |
| 76 return ""; | |
| 77 | |
| 78 std::unique_ptr<TextCodec> textCodec = newTextCodec(*this); | |
| 79 CString encodedString; | |
| 80 if (string.is8Bit()) | |
| 81 encodedString = | |
| 82 textCodec->encode(string.characters8(), string.length(), handling); | |
| 83 else | |
| 84 encodedString = | |
| 85 textCodec->encode(string.characters16(), string.length(), handling); | |
| 86 return encodedString; | |
| 87 } | |
| 88 | |
| 89 bool TextEncoding::usesVisualOrdering() const { | |
| 90 if (noExtendedTextEncodingNameUsed()) | |
| 91 return false; | |
| 92 | |
| 93 static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8"); | |
| 94 return m_name == a; | |
| 95 } | |
| 96 | |
| 97 bool TextEncoding::isNonByteBasedEncoding() const { | |
| 98 if (noExtendedTextEncodingNameUsed()) { | |
| 99 return *this == UTF16LittleEndianEncoding() || | |
| 100 *this == UTF16BigEndianEncoding(); | |
| 101 } | |
| 102 | |
| 103 return *this == UTF16LittleEndianEncoding() || | |
| 104 *this == UTF16BigEndianEncoding() || *this == UTF32Encoding() || | |
| 105 *this == UTF32BigEndianEncoding() || | |
| 106 *this == UTF32LittleEndianEncoding(); | |
| 107 } | |
| 108 | |
| 109 bool TextEncoding::isUTF7Encoding() const { | |
| 110 if (noExtendedTextEncodingNameUsed()) | |
| 111 return false; | |
| 112 | |
| 113 return *this == UTF7Encoding(); | |
| 114 } | |
| 115 | |
| 116 const TextEncoding& TextEncoding::closestByteBasedEquivalent() const { | |
| 117 if (isNonByteBasedEncoding()) | |
| 118 return UTF8Encoding(); | |
| 119 return *this; | |
| 120 } | |
| 121 | |
| 122 // HTML5 specifies that UTF-8 be used in form submission when a form is | |
| 123 // is a part of a document in UTF-16 probably because UTF-16 is not a | |
| 124 // byte-based encoding and can contain 0x00. By extension, the same | |
| 125 // should be done for UTF-32. In case of UTF-7, it is a byte-based encoding, | |
| 126 // but it's fraught with problems and we'd rather steer clear of it. | |
| 127 const TextEncoding& TextEncoding::encodingForFormSubmission() const { | |
| 128 if (isNonByteBasedEncoding() || isUTF7Encoding()) | |
| 129 return UTF8Encoding(); | |
| 130 return *this; | |
| 131 } | |
| 132 | |
| 133 const TextEncoding& ASCIIEncoding() { | |
| 134 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalASCIIEncoding, | |
| 135 new TextEncoding("ASCII")); | |
| 136 return globalASCIIEncoding; | |
| 137 } | |
| 138 | |
| 139 const TextEncoding& Latin1Encoding() { | |
| 140 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalLatin1Encoding, | |
| 141 new TextEncoding("latin1")); | |
| 142 return globalLatin1Encoding; | |
| 143 } | |
| 144 | |
| 145 const TextEncoding& UTF16BigEndianEncoding() { | |
| 146 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, | |
| 147 globalUTF16BigEndianEncoding, | |
| 148 new TextEncoding("UTF-16BE")); | |
| 149 return globalUTF16BigEndianEncoding; | |
| 150 } | |
| 151 | |
| 152 const TextEncoding& UTF16LittleEndianEncoding() { | |
| 153 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, | |
| 154 globalUTF16LittleEndianEncoding, | |
| 155 new TextEncoding("UTF-16LE")); | |
| 156 return globalUTF16LittleEndianEncoding; | |
| 157 } | |
| 158 | |
| 159 // UTF-32 is UTF-32LE with an implicit BOM. | |
| 160 const TextEncoding& UTF32Encoding() { | |
| 161 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF32Encoding, | |
| 162 new TextEncoding("UTF-32")); | |
| 163 return globalUTF32Encoding; | |
| 164 } | |
| 165 | |
| 166 const TextEncoding& UTF32BigEndianEncoding() { | |
| 167 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, | |
| 168 globalUTF32BigEndianEncoding, | |
| 169 new TextEncoding("UTF-32BE")); | |
| 170 return globalUTF32BigEndianEncoding; | |
| 171 } | |
| 172 | |
| 173 const TextEncoding& UTF32LittleEndianEncoding() { | |
| 174 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, | |
| 175 globalUTF32LittleEndianEncoding, | |
| 176 new TextEncoding("UTF-32LE")); | |
| 177 return globalUTF32LittleEndianEncoding; | |
| 178 } | |
| 179 | |
| 180 const TextEncoding& UTF8Encoding() { | |
| 181 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF8Encoding, | |
| 182 new TextEncoding("UTF-8")); | |
| 183 DCHECK(globalUTF8Encoding.isValid()); | |
| 184 return globalUTF8Encoding; | |
| 185 } | |
| 186 | |
| 187 const TextEncoding& WindowsLatin1Encoding() { | |
| 188 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, | |
| 189 globalWindowsLatin1Encoding, | |
| 190 new TextEncoding("WinLatin1")); | |
| 191 return globalWindowsLatin1Encoding; | |
| 192 } | |
| 193 | |
| 194 } // namespace WTF | |
| OLD | NEW |