Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1883)

Unified Diff: third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp

Issue 2379333003: UTF-16 Decoder: Convert unpaired surrogates to replacement characters (Closed)
Patch Set: Rebase, switch test to testharness Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/WebKit/Source/wtf/text/TextCodecUTF16.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp
diff --git a/third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp b/third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp
index 92ec825b60ec493729eeabbc77a0ed367929e97c..0e3f01783bd218e579cba7068674a120d8cbe811 100644
--- a/third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp
+++ b/third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp
@@ -76,59 +76,74 @@ String TextCodecUTF16::decode(const char* bytes,
const bool reallyFlush = flush != DoNotFlush && flush != FetchEOF;
if (!length) {
- if (!reallyFlush || !m_haveBufferedByte)
- return String();
- sawError = true;
- return String(&replacementCharacter, 1);
+ if (reallyFlush && (m_haveLeadByte || m_haveLeadSurrogate)) {
+ m_haveLeadByte = m_haveLeadSurrogate = false;
+ sawError = true;
+ return String(&replacementCharacter, 1);
+ }
+ return String();
}
- // FIXME: This should generate an error if there is an unpaired surrogate.
-
const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes);
- size_t numBytes = length + m_haveBufferedByte;
- size_t numCharsIn = numBytes / 2;
- size_t numCharsOut =
- ((numBytes & 1) && reallyFlush) ? numCharsIn + 1 : numCharsIn;
+ const size_t numBytes = length + m_haveLeadByte;
+ const bool willHaveExtraByte = numBytes & 1;
+ const size_t numCharsIn = numBytes / 2;
+ const size_t maxCharsOut = numCharsIn + (m_haveLeadSurrogate ? 1 : 0) +
+ (reallyFlush && willHaveExtraByte ? 1 : 0);
- StringBuffer<UChar> buffer(numCharsOut);
+ StringBuffer<UChar> buffer(maxCharsOut);
UChar* q = buffer.characters();
- if (m_haveBufferedByte) {
+ for (size_t i = 0; i < numCharsIn; ++i) {
UChar c;
- if (m_littleEndian)
- c = m_bufferedByte | (p[0] << 8);
- else
- c = (m_bufferedByte << 8) | p[0];
- *q++ = c;
- m_haveBufferedByte = false;
- p += 1;
- numCharsIn -= 1;
- }
-
- if (m_littleEndian) {
- for (size_t i = 0; i < numCharsIn; ++i) {
- UChar c = p[0] | (p[1] << 8);
+ if (m_haveLeadByte) {
+ c = m_littleEndian ? (m_leadByte | (p[0] << 8))
+ : ((m_leadByte << 8) | p[0]);
+ m_haveLeadByte = false;
+ ++p;
+ } else {
+ c = m_littleEndian ? (p[0] | (p[1] << 8)) : ((p[0] << 8) | p[1]);
p += 2;
- *q++ = c;
}
- } else {
- for (size_t i = 0; i < numCharsIn; ++i) {
- UChar c = (p[0] << 8) | p[1];
- p += 2;
+
+ // TODO(jsbell): If necessary for performance, m_haveLeadByte handling
+ // can be pulled out and this loop split into distinct cases for
+ // big/little endian. The logic from here to the end of the loop is
+ // constant with respect to m_haveLeadByte and m_littleEndian.
+
+ if (m_haveLeadSurrogate && U_IS_TRAIL(c)) {
+ *q++ = m_leadSurrogate;
+ m_haveLeadSurrogate = false;
*q++ = c;
+ } else {
+ if (m_haveLeadSurrogate) {
+ m_haveLeadSurrogate = false;
+ sawError = true;
+ *q++ = replacementCharacter;
+ }
+
+ if (U_IS_LEAD(c)) {
+ m_haveLeadSurrogate = true;
+ m_leadSurrogate = c;
+ } else if (U_IS_TRAIL(c)) {
+ sawError = true;
+ *q++ = replacementCharacter;
+ } else {
+ *q++ = c;
+ }
}
}
- if (numBytes & 1) {
- ASSERT(!m_haveBufferedByte);
+ DCHECK(!m_haveLeadByte);
+ if (willHaveExtraByte) {
+ m_haveLeadByte = true;
+ m_leadByte = p[0];
+ }
- if (reallyFlush) {
- sawError = true;
- *q++ = replacementCharacter;
- } else {
- m_haveBufferedByte = true;
- m_bufferedByte = p[0];
- }
+ if (reallyFlush && (m_haveLeadByte || m_haveLeadSurrogate)) {
+ m_haveLeadByte = m_haveLeadSurrogate = false;
+ sawError = true;
+ *q++ = replacementCharacter;
}
buffer.shrink(q - buffer.characters());
« no previous file with comments | « third_party/WebKit/Source/wtf/text/TextCodecUTF16.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698