| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
| 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
| 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. | 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. |
| 5 * | 5 * |
| 6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
| 8 * are met: | 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
| (...skipping 18 matching lines...) Expand all Loading... |
| 29 | 29 |
| 30 #include "wtf/OwnPtr.h" | 30 #include "wtf/OwnPtr.h" |
| 31 #include "wtf/StdLibExtras.h" | 31 #include "wtf/StdLibExtras.h" |
| 32 #include "wtf/Threading.h" | 32 #include "wtf/Threading.h" |
| 33 #include "wtf/text/CString.h" | 33 #include "wtf/text/CString.h" |
| 34 #include "wtf/text/TextEncodingRegistry.h" | 34 #include "wtf/text/TextEncodingRegistry.h" |
| 35 #include "wtf/text/WTFString.h" | 35 #include "wtf/text/WTFString.h" |
| 36 | 36 |
| 37 namespace WTF { | 37 namespace WTF { |
| 38 | 38 |
| 39 static const TextEncoding& UTF7Encoding() | 39 static const TextEncoding& UTF7Encoding() { |
| 40 { | 40 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF7Encoding, |
| 41 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF7Encoding, new
TextEncoding("UTF-7")); | 41 new TextEncoding("UTF-7")); |
| 42 return globalUTF7Encoding; | 42 return globalUTF7Encoding; |
| 43 } | 43 } |
| 44 | 44 |
| 45 TextEncoding::TextEncoding(const char* name) | 45 TextEncoding::TextEncoding(const char* name) |
| 46 : m_name(atomicCanonicalTextEncodingName(name)) | 46 : m_name(atomicCanonicalTextEncodingName(name)) { |
| 47 { | 47 // Aliases are valid, but not "replacement" itself. |
| 48 // Aliases are valid, but not "replacement" itself. | 48 if (m_name && isReplacementEncoding(name)) |
| 49 if (m_name && isReplacementEncoding(name)) | 49 m_name = 0; |
| 50 m_name = 0; | |
| 51 } | 50 } |
| 52 | 51 |
| 53 TextEncoding::TextEncoding(const String& name) | 52 TextEncoding::TextEncoding(const String& name) |
| 54 : m_name(atomicCanonicalTextEncodingName(name)) | 53 : m_name(atomicCanonicalTextEncodingName(name)) { |
| 55 { | 54 // Aliases are valid, but not "replacement" itself. |
| 56 // Aliases are valid, but not "replacement" itself. | 55 if (m_name && isReplacementEncoding(name)) |
| 57 if (m_name && isReplacementEncoding(name)) | 56 m_name = 0; |
| 58 m_name = 0; | |
| 59 } | 57 } |
| 60 | 58 |
| 61 String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b
ool& sawError) const | 59 String TextEncoding::decode(const char* data, |
| 62 { | 60 size_t length, |
| 63 if (!m_name) | 61 bool stopOnError, |
| 64 return String(); | 62 bool& sawError) const { |
| 63 if (!m_name) |
| 64 return String(); |
| 65 | 65 |
| 66 return newTextCodec(*this)->decode(data, length, DataEOF, stopOnError, sawEr
ror); | 66 return newTextCodec(*this)->decode(data, length, DataEOF, stopOnError, |
| 67 sawError); |
| 67 } | 68 } |
| 68 | 69 |
| 69 CString TextEncoding::encode(const String& string, UnencodableHandling handling)
const | 70 CString TextEncoding::encode(const String& string, |
| 70 { | 71 UnencodableHandling handling) const { |
| 71 if (!m_name) | 72 if (!m_name) |
| 72 return CString(); | 73 return CString(); |
| 73 | 74 |
| 74 if (string.isEmpty()) | 75 if (string.isEmpty()) |
| 75 return ""; | 76 return ""; |
| 76 | 77 |
| 77 OwnPtr<TextCodec> textCodec = newTextCodec(*this); | 78 OwnPtr<TextCodec> textCodec = newTextCodec(*this); |
| 78 CString encodedString; | 79 CString encodedString; |
| 79 if (string.is8Bit()) | 80 if (string.is8Bit()) |
| 80 encodedString = textCodec->encode(string.characters8(), string.length(),
handling); | 81 encodedString = |
| 81 else | 82 textCodec->encode(string.characters8(), string.length(), handling); |
| 82 encodedString = textCodec->encode(string.characters16(), string.length()
, handling); | 83 else |
| 83 return encodedString; | 84 encodedString = |
| 85 textCodec->encode(string.characters16(), string.length(), handling); |
| 86 return encodedString; |
| 84 } | 87 } |
| 85 | 88 |
| 86 bool TextEncoding::usesVisualOrdering() const | 89 bool TextEncoding::usesVisualOrdering() const { |
| 87 { | 90 if (noExtendedTextEncodingNameUsed()) |
| 88 if (noExtendedTextEncodingNameUsed()) | 91 return false; |
| 89 return false; | |
| 90 | 92 |
| 91 static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8"); | 93 static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8"); |
| 92 return m_name == a; | 94 return m_name == a; |
| 93 } | 95 } |
| 94 | 96 |
| 95 bool TextEncoding::isNonByteBasedEncoding() const | 97 bool TextEncoding::isNonByteBasedEncoding() const { |
| 96 { | 98 if (noExtendedTextEncodingNameUsed()) { |
| 97 if (noExtendedTextEncodingNameUsed()) { | 99 return *this == UTF16LittleEndianEncoding() || |
| 98 return *this == UTF16LittleEndianEncoding() | 100 *this == UTF16BigEndianEncoding(); |
| 99 || *this == UTF16BigEndianEncoding(); | 101 } |
| 100 } | |
| 101 | 102 |
| 102 return *this == UTF16LittleEndianEncoding() | 103 return *this == UTF16LittleEndianEncoding() || |
| 103 || *this == UTF16BigEndianEncoding() | 104 *this == UTF16BigEndianEncoding() || |
| 104 || *this == UTF32BigEndianEncoding() | 105 *this == UTF32BigEndianEncoding() || |
| 105 || *this == UTF32LittleEndianEncoding(); | 106 *this == UTF32LittleEndianEncoding(); |
| 106 } | 107 } |
| 107 | 108 |
| 108 bool TextEncoding::isUTF7Encoding() const | 109 bool TextEncoding::isUTF7Encoding() const { |
| 109 { | 110 if (noExtendedTextEncodingNameUsed()) |
| 110 if (noExtendedTextEncodingNameUsed()) | 111 return false; |
| 111 return false; | |
| 112 | 112 |
| 113 return *this == UTF7Encoding(); | 113 return *this == UTF7Encoding(); |
| 114 } | 114 } |
| 115 | 115 |
| 116 const TextEncoding& TextEncoding::closestByteBasedEquivalent() const | 116 const TextEncoding& TextEncoding::closestByteBasedEquivalent() const { |
| 117 { | 117 if (isNonByteBasedEncoding()) |
| 118 if (isNonByteBasedEncoding()) | 118 return UTF8Encoding(); |
| 119 return UTF8Encoding(); | 119 return *this; |
| 120 return *this; | |
| 121 } | 120 } |
| 122 | 121 |
| 123 // HTML5 specifies that UTF-8 be used in form submission when a form is | 122 // HTML5 specifies that UTF-8 be used in form submission when a form is |
| 124 // is a part of a document in UTF-16 probably because UTF-16 is not a | 123 // is a part of a document in UTF-16 probably because UTF-16 is not a |
| 125 // byte-based encoding and can contain 0x00. By extension, the same | 124 // byte-based encoding and can contain 0x00. By extension, the same |
| 126 // should be done for UTF-32. In case of UTF-7, it is a byte-based encoding, | 125 // should be done for UTF-32. In case of UTF-7, it is a byte-based encoding, |
| 127 // but it's fraught with problems and we'd rather steer clear of it. | 126 // but it's fraught with problems and we'd rather steer clear of it. |
| 128 const TextEncoding& TextEncoding::encodingForFormSubmission() const | 127 const TextEncoding& TextEncoding::encodingForFormSubmission() const { |
| 129 { | 128 if (isNonByteBasedEncoding() || isUTF7Encoding()) |
| 130 if (isNonByteBasedEncoding() || isUTF7Encoding()) | 129 return UTF8Encoding(); |
| 131 return UTF8Encoding(); | 130 return *this; |
| 132 return *this; | |
| 133 } | 131 } |
| 134 | 132 |
| 135 const TextEncoding& ASCIIEncoding() | 133 const TextEncoding& ASCIIEncoding() { |
| 136 { | 134 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalASCIIEncoding, |
| 137 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalASCIIEncoding, new
TextEncoding("ASCII")); | 135 new TextEncoding("ASCII")); |
| 138 return globalASCIIEncoding; | 136 return globalASCIIEncoding; |
| 139 } | 137 } |
| 140 | 138 |
| 141 const TextEncoding& Latin1Encoding() | 139 const TextEncoding& Latin1Encoding() { |
| 142 { | 140 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalLatin1Encoding, |
| 143 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalLatin1Encoding, ne
w TextEncoding("latin1")); | 141 new TextEncoding("latin1")); |
| 144 return globalLatin1Encoding; | 142 return globalLatin1Encoding; |
| 145 } | 143 } |
| 146 | 144 |
| 147 const TextEncoding& UTF16BigEndianEncoding() | 145 const TextEncoding& UTF16BigEndianEncoding() { |
| 148 { | 146 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, |
| 149 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF16BigEndianEnco
ding, new TextEncoding("UTF-16BE")); | 147 globalUTF16BigEndianEncoding, |
| 150 return globalUTF16BigEndianEncoding; | 148 new TextEncoding("UTF-16BE")); |
| 149 return globalUTF16BigEndianEncoding; |
| 151 } | 150 } |
| 152 | 151 |
| 153 const TextEncoding& UTF16LittleEndianEncoding() | 152 const TextEncoding& UTF16LittleEndianEncoding() { |
| 154 { | 153 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, |
| 155 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF16LittleEndianE
ncoding, new TextEncoding("UTF-16LE")); | 154 globalUTF16LittleEndianEncoding, |
| 156 return globalUTF16LittleEndianEncoding; | 155 new TextEncoding("UTF-16LE")); |
| 156 return globalUTF16LittleEndianEncoding; |
| 157 } | 157 } |
| 158 | 158 |
| 159 const TextEncoding& UTF32BigEndianEncoding() | 159 const TextEncoding& UTF32BigEndianEncoding() { |
| 160 { | 160 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, |
| 161 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF32BigEndianEnco
ding, new TextEncoding("UTF-32BE")); | 161 globalUTF32BigEndianEncoding, |
| 162 return globalUTF32BigEndianEncoding; | 162 new TextEncoding("UTF-32BE")); |
| 163 return globalUTF32BigEndianEncoding; |
| 163 } | 164 } |
| 164 | 165 |
| 165 const TextEncoding& UTF32LittleEndianEncoding() | 166 const TextEncoding& UTF32LittleEndianEncoding() { |
| 166 { | 167 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, |
| 167 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF32LittleEndianE
ncoding, new TextEncoding("UTF-32LE")); | 168 globalUTF32LittleEndianEncoding, |
| 168 return globalUTF32LittleEndianEncoding; | 169 new TextEncoding("UTF-32LE")); |
| 170 return globalUTF32LittleEndianEncoding; |
| 169 } | 171 } |
| 170 | 172 |
| 171 const TextEncoding& UTF8Encoding() | 173 const TextEncoding& UTF8Encoding() { |
| 172 { | 174 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF8Encoding, |
| 173 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalUTF8Encoding, new
TextEncoding("UTF-8")); | 175 new TextEncoding("UTF-8")); |
| 174 ASSERT(globalUTF8Encoding.isValid()); | 176 ASSERT(globalUTF8Encoding.isValid()); |
| 175 return globalUTF8Encoding; | 177 return globalUTF8Encoding; |
| 176 } | 178 } |
| 177 | 179 |
| 178 const TextEncoding& WindowsLatin1Encoding() | 180 const TextEncoding& WindowsLatin1Encoding() { |
| 179 { | 181 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, |
| 180 DEFINE_THREAD_SAFE_STATIC_LOCAL(const TextEncoding, globalWindowsLatin1Encod
ing, new TextEncoding("WinLatin1")); | 182 globalWindowsLatin1Encoding, |
| 181 return globalWindowsLatin1Encoding; | 183 new TextEncoding("WinLatin1")); |
| 184 return globalWindowsLatin1Encoding; |
| 182 } | 185 } |
| 183 | 186 |
| 184 } // namespace WTF | 187 } // namespace WTF |
| OLD | NEW |