| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
| 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
| 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. | 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. |
| 5 * | 5 * |
| 6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
| 8 * are met: | 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
| (...skipping 16 matching lines...) Expand all Loading... |
| 27 | 27 |
| 28 #include "config.h" | 28 #include "config.h" |
| 29 #include "wtf/text/TextEncoding.h" | 29 #include "wtf/text/TextEncoding.h" |
| 30 | 30 |
| 31 #include "wtf/OwnPtr.h" | 31 #include "wtf/OwnPtr.h" |
| 32 #include "wtf/StdLibExtras.h" | 32 #include "wtf/StdLibExtras.h" |
| 33 #include "wtf/Threading.h" | 33 #include "wtf/Threading.h" |
| 34 #include "wtf/text/CString.h" | 34 #include "wtf/text/CString.h" |
| 35 #include "wtf/text/TextEncodingRegistry.h" | 35 #include "wtf/text/TextEncodingRegistry.h" |
| 36 #include "wtf/text/WTFString.h" | 36 #include "wtf/text/WTFString.h" |
| 37 #include <unicode/unorm.h> | |
| 38 | 37 |
| 39 namespace WTF { | 38 namespace WTF { |
| 40 | 39 |
| 41 static const TextEncoding& UTF7Encoding() | 40 static const TextEncoding& UTF7Encoding() |
| 42 { | 41 { |
| 43 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF7Encoding,
new TextEncoding("UTF-7")); | 42 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF7Encoding,
new TextEncoding("UTF-7")); |
| 44 return globalUTF7Encoding; | 43 return globalUTF7Encoding; |
| 45 } | 44 } |
| 46 | 45 |
| 47 TextEncoding::TextEncoding(const char* name) | 46 TextEncoding::TextEncoding(const char* name) |
| (...skipping 30 matching lines...) Expand all Loading... |
| 78 | 77 |
| 79 OwnPtr<TextCodec> textCodec = newTextCodec(*this); | 78 OwnPtr<TextCodec> textCodec = newTextCodec(*this); |
| 80 CString encodedString; | 79 CString encodedString; |
| 81 if (string.is8Bit()) | 80 if (string.is8Bit()) |
| 82 encodedString = textCodec->encode(string.characters8(), string.length(),
handling); | 81 encodedString = textCodec->encode(string.characters8(), string.length(),
handling); |
| 83 else | 82 else |
| 84 encodedString = textCodec->encode(string.characters16(), string.length()
, handling); | 83 encodedString = textCodec->encode(string.characters16(), string.length()
, handling); |
| 85 return encodedString; | 84 return encodedString; |
| 86 } | 85 } |
| 87 | 86 |
| 88 CString TextEncoding::normalizeAndEncode(const String& string, UnencodableHandli
ng handling) const | |
| 89 { | |
| 90 if (!m_name) | |
| 91 return CString(); | |
| 92 | |
| 93 if (string.isEmpty()) | |
| 94 return ""; | |
| 95 | |
| 96 // Text exclusively containing Latin-1 characters (U+0000..U+00FF) is left | |
| 97 // unaffected by NFC. This is effectively the same as saying that all | |
| 98 // Latin-1 text is already normalized to NFC. | |
| 99 // Source: http://unicode.org/reports/tr15/ | |
| 100 if (string.is8Bit()) | |
| 101 return newTextCodec(*this)->encode(string.characters8(), string.length()
, handling); | |
| 102 | |
| 103 const UChar* source = string.characters16(); | |
| 104 size_t length = string.length(); | |
| 105 | |
| 106 Vector<UChar> normalizedCharacters; | |
| 107 | |
| 108 UErrorCode err = U_ZERO_ERROR; | |
| 109 if (unorm_quickCheck(source, length, UNORM_NFC, &err) != UNORM_YES) { | |
| 110 // First try using the length of the original string, since normalizatio
n to NFC rarely increases length. | |
| 111 normalizedCharacters.grow(length); | |
| 112 int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0,
normalizedCharacters.data(), length, &err); | |
| 113 if (err == U_BUFFER_OVERFLOW_ERROR) { | |
| 114 err = U_ZERO_ERROR; | |
| 115 normalizedCharacters.resize(normalizedLength); | |
| 116 normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, nor
malizedCharacters.data(), normalizedLength, &err); | |
| 117 } | |
| 118 ASSERT(U_SUCCESS(err)); | |
| 119 | |
| 120 source = normalizedCharacters.data(); | |
| 121 length = normalizedLength; | |
| 122 } | |
| 123 | |
| 124 return newTextCodec(*this)->encode(source, length, handling); | |
| 125 } | |
| 126 | |
| 127 bool TextEncoding::usesVisualOrdering() const | 87 bool TextEncoding::usesVisualOrdering() const |
| 128 { | 88 { |
| 129 if (noExtendedTextEncodingNameUsed()) | 89 if (noExtendedTextEncodingNameUsed()) |
| 130 return false; | 90 return false; |
| 131 | 91 |
| 132 static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8"); | 92 static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8"); |
| 133 return m_name == a; | 93 return m_name == a; |
| 134 } | 94 } |
| 135 | 95 |
| 136 bool TextEncoding::isNonByteBasedEncoding() const | 96 bool TextEncoding::isNonByteBasedEncoding() const |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 216 return globalUTF8Encoding; | 176 return globalUTF8Encoding; |
| 217 } | 177 } |
| 218 | 178 |
| 219 const TextEncoding& WindowsLatin1Encoding() | 179 const TextEncoding& WindowsLatin1Encoding() |
| 220 { | 180 { |
| 221 AtomicallyInitializedStaticReference(const TextEncoding, globalWindowsLatin1
Encoding, new TextEncoding("WinLatin1")); | 181 AtomicallyInitializedStaticReference(const TextEncoding, globalWindowsLatin1
Encoding, new TextEncoding("WinLatin1")); |
| 222 return globalWindowsLatin1Encoding; | 182 return globalWindowsLatin1Encoding; |
| 223 } | 183 } |
| 224 | 184 |
| 225 } // namespace WTF | 185 } // namespace WTF |
| OLD | NEW |