| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
| 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
| 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. | 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. |
| 5 * | 5 * |
| 6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
| 8 * are met: | 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 | 30 |
| 31 #include "wtf/OwnPtr.h" | 31 #include "wtf/OwnPtr.h" |
| 32 #include "wtf/StdLibExtras.h" | 32 #include "wtf/StdLibExtras.h" |
| 33 #include "wtf/Threading.h" | 33 #include "wtf/Threading.h" |
| 34 #include "wtf/text/CString.h" | 34 #include "wtf/text/CString.h" |
| 35 #include "wtf/text/TextEncodingRegistry.h" | 35 #include "wtf/text/TextEncodingRegistry.h" |
| 36 #include "wtf/text/WTFString.h" | 36 #include "wtf/text/WTFString.h" |
| 37 | 37 |
| 38 namespace WTF { | 38 namespace WTF { |
| 39 | 39 |
| 40 static const TextEncoding& UTF7Encoding() | 40 static const TextEncoding& UTF7Encoding() { |
| 41 { | 41 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF7Encoding, n
ew TextEncoding("UTF-7")); |
| 42 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF7Encoding,
new TextEncoding("UTF-7")); | 42 return globalUTF7Encoding; |
| 43 return globalUTF7Encoding; | |
| 44 } | 43 } |
| 45 | 44 |
| 46 TextEncoding::TextEncoding(const char* name) | 45 TextEncoding::TextEncoding(const char* name) |
| 47 : m_name(atomicCanonicalTextEncodingName(name)) | 46 : m_name(atomicCanonicalTextEncodingName(name)) { |
| 48 { | 47 // Aliases are valid, but not "replacement" itself. |
| 49 // Aliases are valid, but not "replacement" itself. | 48 if (m_name && isReplacementEncoding(name)) |
| 50 if (m_name && isReplacementEncoding(name)) | 49 m_name = 0; |
| 51 m_name = 0; | |
| 52 } | 50 } |
| 53 | 51 |
| 54 TextEncoding::TextEncoding(const String& name) | 52 TextEncoding::TextEncoding(const String& name) |
| 55 : m_name(atomicCanonicalTextEncodingName(name)) | 53 : m_name(atomicCanonicalTextEncodingName(name)) { |
| 56 { | 54 // Aliases are valid, but not "replacement" itself. |
| 57 // Aliases are valid, but not "replacement" itself. | 55 if (m_name && isReplacementEncoding(name)) |
| 58 if (m_name && isReplacementEncoding(name)) | 56 m_name = 0; |
| 59 m_name = 0; | |
| 60 } | 57 } |
| 61 | 58 |
| 62 String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b
ool& sawError) const | 59 String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b
ool& sawError) const { |
| 63 { | 60 if (!m_name) |
| 64 if (!m_name) | 61 return String(); |
| 65 return String(); | |
| 66 | 62 |
| 67 return newTextCodec(*this)->decode(data, length, DataEOF, stopOnError, sawEr
ror); | 63 return newTextCodec(*this)->decode(data, length, DataEOF, stopOnError, sawErro
r); |
| 68 } | 64 } |
| 69 | 65 |
| 70 CString TextEncoding::encode(const String& string, UnencodableHandling handling)
const | 66 CString TextEncoding::encode(const String& string, UnencodableHandling handling)
const { |
| 71 { | 67 if (!m_name) |
| 72 if (!m_name) | 68 return CString(); |
| 73 return CString(); | |
| 74 | 69 |
| 75 if (string.isEmpty()) | 70 if (string.isEmpty()) |
| 76 return ""; | 71 return ""; |
| 77 | 72 |
| 78 OwnPtr<TextCodec> textCodec = newTextCodec(*this); | 73 OwnPtr<TextCodec> textCodec = newTextCodec(*this); |
| 79 CString encodedString; | 74 CString encodedString; |
| 80 if (string.is8Bit()) | 75 if (string.is8Bit()) |
| 81 encodedString = textCodec->encode(string.characters8(), string.length(),
handling); | 76 encodedString = textCodec->encode(string.characters8(), string.length(), han
dling); |
| 82 else | 77 else |
| 83 encodedString = textCodec->encode(string.characters16(), string.length()
, handling); | 78 encodedString = textCodec->encode(string.characters16(), string.length(), ha
ndling); |
| 84 return encodedString; | 79 return encodedString; |
| 85 } | 80 } |
| 86 | 81 |
| 87 bool TextEncoding::usesVisualOrdering() const | 82 bool TextEncoding::usesVisualOrdering() const { |
| 88 { | 83 if (noExtendedTextEncodingNameUsed()) |
| 89 if (noExtendedTextEncodingNameUsed()) | 84 return false; |
| 90 return false; | |
| 91 | 85 |
| 92 static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8"); | 86 static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8"); |
| 93 return m_name == a; | 87 return m_name == a; |
| 94 } | 88 } |
| 95 | 89 |
| 96 bool TextEncoding::isNonByteBasedEncoding() const | 90 bool TextEncoding::isNonByteBasedEncoding() const { |
| 97 { | 91 if (noExtendedTextEncodingNameUsed()) { |
| 98 if (noExtendedTextEncodingNameUsed()) { | 92 return *this == UTF16LittleEndianEncoding() || *this == UTF16BigEndianEncodi
ng(); |
| 99 return *this == UTF16LittleEndianEncoding() | 93 } |
| 100 || *this == UTF16BigEndianEncoding(); | |
| 101 } | |
| 102 | 94 |
| 103 return *this == UTF16LittleEndianEncoding() | 95 return *this == UTF16LittleEndianEncoding() || *this == UTF16BigEndianEncoding
() || *this == UTF32BigEndianEncoding() || *this == UTF32LittleEndianEncoding(); |
| 104 || *this == UTF16BigEndianEncoding() | |
| 105 || *this == UTF32BigEndianEncoding() | |
| 106 || *this == UTF32LittleEndianEncoding(); | |
| 107 } | 96 } |
| 108 | 97 |
| 109 bool TextEncoding::isUTF7Encoding() const | 98 bool TextEncoding::isUTF7Encoding() const { |
| 110 { | 99 if (noExtendedTextEncodingNameUsed()) |
| 111 if (noExtendedTextEncodingNameUsed()) | 100 return false; |
| 112 return false; | |
| 113 | 101 |
| 114 return *this == UTF7Encoding(); | 102 return *this == UTF7Encoding(); |
| 115 } | 103 } |
| 116 | 104 |
| 117 const TextEncoding& TextEncoding::closestByteBasedEquivalent() const | 105 const TextEncoding& TextEncoding::closestByteBasedEquivalent() const { |
| 118 { | 106 if (isNonByteBasedEncoding()) |
| 119 if (isNonByteBasedEncoding()) | 107 return UTF8Encoding(); |
| 120 return UTF8Encoding(); | 108 return *this; |
| 121 return *this; | |
| 122 } | 109 } |
| 123 | 110 |
| 124 // HTML5 specifies that UTF-8 be used in form submission when a form is | 111 // HTML5 specifies that UTF-8 be used in form submission when a form is |
| 125 // is a part of a document in UTF-16 probably because UTF-16 is not a | 112 // is a part of a document in UTF-16 probably because UTF-16 is not a |
| 126 // byte-based encoding and can contain 0x00. By extension, the same | 113 // byte-based encoding and can contain 0x00. By extension, the same |
| 127 // should be done for UTF-32. In case of UTF-7, it is a byte-based encoding, | 114 // should be done for UTF-32. In case of UTF-7, it is a byte-based encoding, |
| 128 // but it's fraught with problems and we'd rather steer clear of it. | 115 // but it's fraught with problems and we'd rather steer clear of it. |
| 129 const TextEncoding& TextEncoding::encodingForFormSubmission() const | 116 const TextEncoding& TextEncoding::encodingForFormSubmission() const { |
| 130 { | 117 if (isNonByteBasedEncoding() || isUTF7Encoding()) |
| 131 if (isNonByteBasedEncoding() || isUTF7Encoding()) | 118 return UTF8Encoding(); |
| 132 return UTF8Encoding(); | 119 return *this; |
| 133 return *this; | |
| 134 } | 120 } |
| 135 | 121 |
| 136 const TextEncoding& ASCIIEncoding() | 122 const TextEncoding& ASCIIEncoding() { |
| 137 { | 123 AtomicallyInitializedStaticReference(const TextEncoding, globalASCIIEncoding,
new TextEncoding("ASCII")); |
| 138 AtomicallyInitializedStaticReference(const TextEncoding, globalASCIIEncoding
, new TextEncoding("ASCII")); | 124 return globalASCIIEncoding; |
| 139 return globalASCIIEncoding; | |
| 140 } | 125 } |
| 141 | 126 |
| 142 const TextEncoding& Latin1Encoding() | 127 const TextEncoding& Latin1Encoding() { |
| 143 { | 128 AtomicallyInitializedStaticReference(const TextEncoding, globalLatin1Encoding,
new TextEncoding("latin1")); |
| 144 AtomicallyInitializedStaticReference(const TextEncoding, globalLatin1Encodin
g, new TextEncoding("latin1")); | 129 return globalLatin1Encoding; |
| 145 return globalLatin1Encoding; | |
| 146 } | 130 } |
| 147 | 131 |
| 148 const TextEncoding& UTF16BigEndianEncoding() | 132 const TextEncoding& UTF16BigEndianEncoding() { |
| 149 { | 133 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF16BigEndianE
ncoding, new TextEncoding("UTF-16BE")); |
| 150 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF16BigEndia
nEncoding, new TextEncoding("UTF-16BE")); | 134 return globalUTF16BigEndianEncoding; |
| 151 return globalUTF16BigEndianEncoding; | |
| 152 } | 135 } |
| 153 | 136 |
| 154 const TextEncoding& UTF16LittleEndianEncoding() | 137 const TextEncoding& UTF16LittleEndianEncoding() { |
| 155 { | 138 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF16LittleEndi
anEncoding, new TextEncoding("UTF-16LE")); |
| 156 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF16LittleEn
dianEncoding, new TextEncoding("UTF-16LE")); | 139 return globalUTF16LittleEndianEncoding; |
| 157 return globalUTF16LittleEndianEncoding; | |
| 158 } | 140 } |
| 159 | 141 |
| 160 const TextEncoding& UTF32BigEndianEncoding() | 142 const TextEncoding& UTF32BigEndianEncoding() { |
| 161 { | 143 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF32BigEndianE
ncoding, new TextEncoding("UTF-32BE")); |
| 162 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF32BigEndia
nEncoding, new TextEncoding("UTF-32BE")); | 144 return globalUTF32BigEndianEncoding; |
| 163 return globalUTF32BigEndianEncoding; | |
| 164 } | 145 } |
| 165 | 146 |
| 166 const TextEncoding& UTF32LittleEndianEncoding() | 147 const TextEncoding& UTF32LittleEndianEncoding() { |
| 167 { | 148 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF32LittleEndi
anEncoding, new TextEncoding("UTF-32LE")); |
| 168 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF32LittleEn
dianEncoding, new TextEncoding("UTF-32LE")); | 149 return globalUTF32LittleEndianEncoding; |
| 169 return globalUTF32LittleEndianEncoding; | |
| 170 } | 150 } |
| 171 | 151 |
| 172 const TextEncoding& UTF8Encoding() | 152 const TextEncoding& UTF8Encoding() { |
| 173 { | 153 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF8Encoding, n
ew TextEncoding("UTF-8")); |
| 174 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF8Encoding,
new TextEncoding("UTF-8")); | 154 ASSERT(globalUTF8Encoding.isValid()); |
| 175 ASSERT(globalUTF8Encoding.isValid()); | 155 return globalUTF8Encoding; |
| 176 return globalUTF8Encoding; | |
| 177 } | 156 } |
| 178 | 157 |
| 179 const TextEncoding& WindowsLatin1Encoding() | 158 const TextEncoding& WindowsLatin1Encoding() { |
| 180 { | 159 AtomicallyInitializedStaticReference(const TextEncoding, globalWindowsLatin1En
coding, new TextEncoding("WinLatin1")); |
| 181 AtomicallyInitializedStaticReference(const TextEncoding, globalWindowsLatin1
Encoding, new TextEncoding("WinLatin1")); | 160 return globalWindowsLatin1Encoding; |
| 182 return globalWindowsLatin1Encoding; | |
| 183 } | 161 } |
| 184 | 162 |
| 185 } // namespace WTF | 163 } // namespace WTF |
| OLD | NEW |