OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
4 * Copyright (C) 2007-2009 Torch Mobile, Inc. | 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. |
5 * | 5 * |
6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
8 * are met: | 8 * are met: |
9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
59 String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b
ool& sawError) const | 59 String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b
ool& sawError) const |
60 { | 60 { |
61 if (!m_name) | 61 if (!m_name) |
62 return String(); | 62 return String(); |
63 | 63 |
64 return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError
); | 64 return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError
); |
65 } | 65 } |
66 | 66 |
67 CString TextEncoding::encode(const String& string, UnencodableHandling handling)
const | 67 CString TextEncoding::encode(const String& string, UnencodableHandling handling)
const |
68 { | 68 { |
| 69 return encode(string, NFCNormalization, handling); |
| 70 } |
| 71 |
| 72 CString TextEncoding::encode(const String& string, NormalizationMode mode, Unenc
odableHandling handling) const |
| 73 { |
69 if (!m_name) | 74 if (!m_name) |
70 return CString(); | 75 return CString(); |
71 | 76 |
72 if (string.isEmpty()) | 77 if (string.isEmpty()) |
73 return ""; | 78 return ""; |
74 | 79 |
75 // Text exclusively containing Latin-1 characters (U+0000..U+00FF) is left | 80 // Text exclusively containing Latin-1 characters (U+0000..U+00FF) is left |
76 // unaffected by NFC. This is effectively the same as saying that all | 81 // unaffected by NFC. This is effectively the same as saying that all |
77 // Latin-1 text is already normalized to NFC. | 82 // Latin-1 text is already normalized to NFC. |
78 // Source: http://unicode.org/reports/tr15/ | 83 // Source: http://unicode.org/reports/tr15/ |
79 if (string.is8Bit()) | 84 if ((mode == NoNormalization || mode == NFCNormalization) |
| 85 && string.is8Bit()) |
80 return newTextCodec(*this)->encode(string.characters8(), string.length()
, handling); | 86 return newTextCodec(*this)->encode(string.characters8(), string.length()
, handling); |
81 | 87 |
82 // FIXME: What's the right place to do normalization? | |
83 // It's a little strange to do it inside the encode function. | |
84 // Perhaps normalization should be an explicit step done before calling enco
de. | |
85 | |
86 const UChar* source = string.characters16(); | 88 const UChar* source = string.characters16(); |
87 size_t length = string.length(); | 89 size_t length = string.length(); |
88 | 90 |
89 Vector<UChar> normalizedCharacters; | 91 Vector<UChar> normalizedCharacters; |
90 | 92 |
| 93 UNormalizationMode unormMode; |
| 94 switch (mode) { |
| 95 case NoNormalization: |
| 96 unormMode = UNORM_NONE; |
| 97 break; |
| 98 case NFCNormalization: |
| 99 unormMode = UNORM_NFC; |
| 100 break; |
| 101 } |
| 102 |
91 UErrorCode err = U_ZERO_ERROR; | 103 UErrorCode err = U_ZERO_ERROR; |
92 if (unorm_quickCheck(source, length, UNORM_NFC, &err) != UNORM_YES) { | 104 if (mode != NoNormalization |
93 // First try using the length of the original string, since normalizatio
n to NFC rarely increases length. | 105 && unorm_quickCheck(source, length, unormMode, &err) != UNORM_YES) { |
| 106 // First try using the length of the original string, since normalizatio
n might not increase the length. |
94 normalizedCharacters.grow(length); | 107 normalizedCharacters.grow(length); |
95 int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0,
normalizedCharacters.data(), length, &err); | 108 int32_t normalizedLength = unorm_normalize(source, length, unormMode, 0,
normalizedCharacters.data(), length, &err); |
96 if (err == U_BUFFER_OVERFLOW_ERROR) { | 109 if (err == U_BUFFER_OVERFLOW_ERROR) { |
97 err = U_ZERO_ERROR; | 110 err = U_ZERO_ERROR; |
98 normalizedCharacters.resize(normalizedLength); | 111 normalizedCharacters.resize(normalizedLength); |
99 normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, nor
malizedCharacters.data(), normalizedLength, &err); | 112 normalizedLength = unorm_normalize(source, length, unormMode, 0, nor
malizedCharacters.data(), normalizedLength, &err); |
100 } | 113 } |
101 ASSERT(U_SUCCESS(err)); | 114 ASSERT(U_SUCCESS(err)); |
102 | 115 |
103 source = normalizedCharacters.data(); | 116 source = normalizedCharacters.data(); |
104 length = normalizedLength; | 117 length = normalizedLength; |
105 } | 118 } |
106 | 119 |
107 return newTextCodec(*this)->encode(source, length, handling); | 120 return newTextCodec(*this)->encode(source, length, handling); |
108 } | 121 } |
109 | 122 |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
227 return globalUTF8Encoding; | 240 return globalUTF8Encoding; |
228 } | 241 } |
229 | 242 |
230 const TextEncoding& WindowsLatin1Encoding() | 243 const TextEncoding& WindowsLatin1Encoding() |
231 { | 244 { |
232 static TextEncoding globalWindowsLatin1Encoding("WinLatin1"); | 245 static TextEncoding globalWindowsLatin1Encoding("WinLatin1"); |
233 return globalWindowsLatin1Encoding; | 246 return globalWindowsLatin1Encoding; |
234 } | 247 } |
235 | 248 |
236 } // namespace WTF | 249 } // namespace WTF |
OLD | NEW |