Index: Source/wtf/text/TextEncoding.cpp |
diff --git a/Source/wtf/text/TextEncoding.cpp b/Source/wtf/text/TextEncoding.cpp |
index 90a31d487f99ff88e57b3ff08605d7a6e8429242..b2e8807e357181590c34589fbb851aca397494b4 100644 |
--- a/Source/wtf/text/TextEncoding.cpp |
+++ b/Source/wtf/text/TextEncoding.cpp |
@@ -66,6 +66,11 @@ String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b |
CString TextEncoding::encode(const String& string, UnencodableHandling handling) const |
{ |
+ return encode(string, NFCNormalization, handling); |
+} |
+ |
+CString TextEncoding::encode(const String& string, NormalizationMode mode, UnencodableHandling handling) const |
+{ |
if (!m_name) |
return CString(); |
@@ -76,27 +81,35 @@ CString TextEncoding::encode(const String& string, UnencodableHandling handling) |
// unaffected by NFC. This is effectively the same as saying that all |
// Latin-1 text is already normalized to NFC. |
// Source: http://unicode.org/reports/tr15/ |
- if (string.is8Bit()) |
+ if ((mode == NoNormalization || mode == NFCNormalization) |
+ && string.is8Bit()) |
return newTextCodec(*this)->encode(string.characters8(), string.length(), handling); |
- // FIXME: What's the right place to do normalization? |
- // It's a little strange to do it inside the encode function. |
- // Perhaps normalization should be an explicit step done before calling encode. |
- |
const UChar* source = string.characters16(); |
size_t length = string.length(); |
Vector<UChar> normalizedCharacters; |
+ UNormalizationMode unormMode; |
+ switch (mode) { |
+ case NoNormalization: |
+ unormMode = UNORM_NONE; |
+ break; |
+ case NFCNormalization: |
+ unormMode = UNORM_NFC; |
+ break; |
+ } |
+ |
UErrorCode err = U_ZERO_ERROR; |
- if (unorm_quickCheck(source, length, UNORM_NFC, &err) != UNORM_YES) { |
- // First try using the length of the original string, since normalization to NFC rarely increases length. |
+ if (mode != NoNormalization |
+ && unorm_quickCheck(source, length, unormMode, &err) != UNORM_YES) { |
+ // First try using the length of the original string, since normalization might not increase the length. |
normalizedCharacters.grow(length); |
- int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err); |
+ int32_t normalizedLength = unorm_normalize(source, length, unormMode, 0, normalizedCharacters.data(), length, &err); |
if (err == U_BUFFER_OVERFLOW_ERROR) { |
err = U_ZERO_ERROR; |
normalizedCharacters.resize(normalizedLength); |
- normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err); |
+ normalizedLength = unorm_normalize(source, length, unormMode, 0, normalizedCharacters.data(), normalizedLength, &err); |
} |
ASSERT(U_SUCCESS(err)); |