| Index: Source/wtf/text/TextEncoding.cpp
|
| diff --git a/Source/wtf/text/TextEncoding.cpp b/Source/wtf/text/TextEncoding.cpp
|
| index 90a31d487f99ff88e57b3ff08605d7a6e8429242..b2e8807e357181590c34589fbb851aca397494b4 100644
|
| --- a/Source/wtf/text/TextEncoding.cpp
|
| +++ b/Source/wtf/text/TextEncoding.cpp
|
| @@ -66,6 +66,11 @@ String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b
|
|
|
| CString TextEncoding::encode(const String& string, UnencodableHandling handling) const
|
| {
|
| + return encode(string, NFCNormalization, handling);
|
| +}
|
| +
|
| +CString TextEncoding::encode(const String& string, NormalizationMode mode, UnencodableHandling handling) const
|
| +{
|
| if (!m_name)
|
| return CString();
|
|
|
| @@ -76,27 +81,35 @@ CString TextEncoding::encode(const String& string, UnencodableHandling handling)
|
| // unaffected by NFC. This is effectively the same as saying that all
|
| // Latin-1 text is already normalized to NFC.
|
| // Source: http://unicode.org/reports/tr15/
|
| - if (string.is8Bit())
|
| + if ((mode == NoNormalization || mode == NFCNormalization)
|
| + && string.is8Bit())
|
| return newTextCodec(*this)->encode(string.characters8(), string.length(), handling);
|
|
|
| - // FIXME: What's the right place to do normalization?
|
| - // It's a little strange to do it inside the encode function.
|
| - // Perhaps normalization should be an explicit step done before calling encode.
|
| -
|
| const UChar* source = string.characters16();
|
| size_t length = string.length();
|
|
|
| Vector<UChar> normalizedCharacters;
|
|
|
| + UNormalizationMode unormMode;
|
| + switch (mode) {
|
| + case NoNormalization:
|
| + unormMode = UNORM_NONE;
|
| + break;
|
| + case NFCNormalization:
|
| + unormMode = UNORM_NFC;
|
| + break;
|
| + }
|
| +
|
| UErrorCode err = U_ZERO_ERROR;
|
| - if (unorm_quickCheck(source, length, UNORM_NFC, &err) != UNORM_YES) {
|
| - // First try using the length of the original string, since normalization to NFC rarely increases length.
|
| + if (mode != NoNormalization
|
| + && unorm_quickCheck(source, length, unormMode, &err) != UNORM_YES) {
|
| + // First try using the length of the original string, since normalization might not increase the length.
|
| normalizedCharacters.grow(length);
|
| - int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err);
|
| + int32_t normalizedLength = unorm_normalize(source, length, unormMode, 0, normalizedCharacters.data(), length, &err);
|
| if (err == U_BUFFER_OVERFLOW_ERROR) {
|
| err = U_ZERO_ERROR;
|
| normalizedCharacters.resize(normalizedLength);
|
| - normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err);
|
| + normalizedLength = unorm_normalize(source, length, unormMode, 0, normalizedCharacters.data(), normalizedLength, &err);
|
| }
|
| ASSERT(U_SUCCESS(err));
|
|
|
|
|