| Index: third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp
|
| diff --git a/third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp b/third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp
|
| index 92ec825b60ec493729eeabbc77a0ed367929e97c..0e3f01783bd218e579cba7068674a120d8cbe811 100644
|
| --- a/third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp
|
| +++ b/third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp
|
| @@ -76,59 +76,74 @@ String TextCodecUTF16::decode(const char* bytes,
|
| const bool reallyFlush = flush != DoNotFlush && flush != FetchEOF;
|
|
|
| if (!length) {
|
| - if (!reallyFlush || !m_haveBufferedByte)
|
| - return String();
|
| - sawError = true;
|
| - return String(&replacementCharacter, 1);
|
| + if (reallyFlush && (m_haveLeadByte || m_haveLeadSurrogate)) {
|
| + m_haveLeadByte = m_haveLeadSurrogate = false;
|
| + sawError = true;
|
| + return String(&replacementCharacter, 1);
|
| + }
|
| + return String();
|
| }
|
|
|
| - // FIXME: This should generate an error if there is an unpaired surrogate.
|
| -
|
| const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes);
|
| - size_t numBytes = length + m_haveBufferedByte;
|
| - size_t numCharsIn = numBytes / 2;
|
| - size_t numCharsOut =
|
| - ((numBytes & 1) && reallyFlush) ? numCharsIn + 1 : numCharsIn;
|
| + const size_t numBytes = length + m_haveLeadByte;
|
| + const bool willHaveExtraByte = numBytes & 1;
|
| + const size_t numCharsIn = numBytes / 2;
|
| + const size_t maxCharsOut = numCharsIn + (m_haveLeadSurrogate ? 1 : 0) +
|
| + (reallyFlush && willHaveExtraByte ? 1 : 0);
|
|
|
| - StringBuffer<UChar> buffer(numCharsOut);
|
| + StringBuffer<UChar> buffer(maxCharsOut);
|
| UChar* q = buffer.characters();
|
|
|
| - if (m_haveBufferedByte) {
|
| + for (size_t i = 0; i < numCharsIn; ++i) {
|
| UChar c;
|
| - if (m_littleEndian)
|
| - c = m_bufferedByte | (p[0] << 8);
|
| - else
|
| - c = (m_bufferedByte << 8) | p[0];
|
| - *q++ = c;
|
| - m_haveBufferedByte = false;
|
| - p += 1;
|
| - numCharsIn -= 1;
|
| - }
|
| -
|
| - if (m_littleEndian) {
|
| - for (size_t i = 0; i < numCharsIn; ++i) {
|
| - UChar c = p[0] | (p[1] << 8);
|
| + if (m_haveLeadByte) {
|
| + c = m_littleEndian ? (m_leadByte | (p[0] << 8))
|
| + : ((m_leadByte << 8) | p[0]);
|
| + m_haveLeadByte = false;
|
| + ++p;
|
| + } else {
|
| + c = m_littleEndian ? (p[0] | (p[1] << 8)) : ((p[0] << 8) | p[1]);
|
| p += 2;
|
| - *q++ = c;
|
| }
|
| - } else {
|
| - for (size_t i = 0; i < numCharsIn; ++i) {
|
| - UChar c = (p[0] << 8) | p[1];
|
| - p += 2;
|
| +
|
| + // TODO(jsbell): If necessary for performance, m_haveLeadByte handling
|
| + // can be pulled out and this loop split into distinct cases for
|
| + // big/little endian. The logic from here to the end of the loop is
|
| + // constant with respect to m_haveLeadByte and m_littleEndian.
|
| +
|
| + if (m_haveLeadSurrogate && U_IS_TRAIL(c)) {
|
| + *q++ = m_leadSurrogate;
|
| + m_haveLeadSurrogate = false;
|
| *q++ = c;
|
| + } else {
|
| + if (m_haveLeadSurrogate) {
|
| + m_haveLeadSurrogate = false;
|
| + sawError = true;
|
| + *q++ = replacementCharacter;
|
| + }
|
| +
|
| + if (U_IS_LEAD(c)) {
|
| + m_haveLeadSurrogate = true;
|
| + m_leadSurrogate = c;
|
| + } else if (U_IS_TRAIL(c)) {
|
| + sawError = true;
|
| + *q++ = replacementCharacter;
|
| + } else {
|
| + *q++ = c;
|
| + }
|
| }
|
| }
|
|
|
| - if (numBytes & 1) {
|
| - ASSERT(!m_haveBufferedByte);
|
| + DCHECK(!m_haveLeadByte);
|
| + if (willHaveExtraByte) {
|
| + m_haveLeadByte = true;
|
| + m_leadByte = p[0];
|
| + }
|
|
|
| - if (reallyFlush) {
|
| - sawError = true;
|
| - *q++ = replacementCharacter;
|
| - } else {
|
| - m_haveBufferedByte = true;
|
| - m_bufferedByte = p[0];
|
| - }
|
| + if (reallyFlush && (m_haveLeadByte || m_haveLeadSurrogate)) {
|
| + m_haveLeadByte = m_haveLeadSurrogate = false;
|
| + sawError = true;
|
| + *q++ = replacementCharacter;
|
| }
|
|
|
| buffer.shrink(q - buffer.characters());
|
|
|