Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(347)

Unified Diff: third_party/WebKit/Source/wtf/text/TextCodecUTF8.cpp

Issue 2498653002: Return one U+fffd for longest subpart of incomplete utf-8 character. (Closed)
Patch Set: Remove dead assignments. Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/WebKit/LayoutTests/fast/encoding/char-decoding-invalid-trail.html ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/WebKit/Source/wtf/text/TextCodecUTF8.cpp
diff --git a/third_party/WebKit/Source/wtf/text/TextCodecUTF8.cpp b/third_party/WebKit/Source/wtf/text/TextCodecUTF8.cpp
index 5ce1ca3ffaccd59e5d27c2afe028d8c1f7f85760..2d1d705535c319575adc68f55be3c60913be8c8b 100644
--- a/third_party/WebKit/Source/wtf/text/TextCodecUTF8.cpp
+++ b/third_party/WebKit/Source/wtf/text/TextCodecUTF8.cpp
@@ -36,7 +36,15 @@ namespace WTF {
using namespace WTF::Unicode;
-const int nonCharacter = -1;
+// We'll use nonCharacter* constants to signal invalid utf-8.
+// The number in the name signals how many input bytes were invalid.
+const int nonCharacter1 = -1;
+const int nonCharacter2 = -2;
+const int nonCharacter3 = -3;
+
+bool isNonCharacter(int character) {
+ return character >= nonCharacter3 && character <= nonCharacter1;
+}
std::unique_ptr<TextCodec> TextCodecUTF8::create(const TextEncoding&,
const void*) {
@@ -87,9 +95,9 @@ static inline int decodeNonASCIISequence(const uint8_t* sequence,
if (length == 2) {
ASSERT(sequence[0] <= 0xDF);
if (sequence[0] < 0xC2)
- return nonCharacter;
+ return nonCharacter1;
if (sequence[1] < 0x80 || sequence[1] > 0xBF)
- return nonCharacter;
+ return nonCharacter1;
return ((sequence[0] << 6) + sequence[1]) - 0x00003080;
}
if (length == 3) {
@@ -97,18 +105,18 @@ static inline int decodeNonASCIISequence(const uint8_t* sequence,
switch (sequence[0]) {
case 0xE0:
if (sequence[1] < 0xA0 || sequence[1] > 0xBF)
- return nonCharacter;
+ return nonCharacter1;
break;
case 0xED:
if (sequence[1] < 0x80 || sequence[1] > 0x9F)
- return nonCharacter;
+ return nonCharacter1;
break;
default:
if (sequence[1] < 0x80 || sequence[1] > 0xBF)
- return nonCharacter;
+ return nonCharacter1;
}
if (sequence[2] < 0x80 || sequence[2] > 0xBF)
- return nonCharacter;
+ return nonCharacter2;
return ((sequence[0] << 12) + (sequence[1] << 6) + sequence[2]) -
0x000E2080;
}
@@ -117,28 +125,28 @@ static inline int decodeNonASCIISequence(const uint8_t* sequence,
switch (sequence[0]) {
case 0xF0:
if (sequence[1] < 0x90 || sequence[1] > 0xBF)
- return nonCharacter;
+ return nonCharacter1;
break;
case 0xF4:
if (sequence[1] < 0x80 || sequence[1] > 0x8F)
- return nonCharacter;
+ return nonCharacter1;
break;
default:
if (sequence[1] < 0x80 || sequence[1] > 0xBF)
- return nonCharacter;
+ return nonCharacter1;
}
if (sequence[2] < 0x80 || sequence[2] > 0xBF)
- return nonCharacter;
+ return nonCharacter2;
if (sequence[3] < 0x80 || sequence[3] > 0xBF)
- return nonCharacter;
+ return nonCharacter3;
return ((sequence[0] << 18) + (sequence[1] << 12) + (sequence[2] << 6) +
sequence[3]) -
0x03C82080;
}
static inline UChar* appendCharacter(UChar* destination, int character) {
- ASSERT(character != nonCharacter);
- ASSERT(!U_IS_SURROGATE(character));
+ DCHECK(!isNonCharacter(character));
+ DCHECK(!U_IS_SURROGATE(character));
if (U_IS_BMP(character)) {
*destination++ = static_cast<UChar>(character);
} else {
@@ -256,7 +264,7 @@ bool TextCodecUTF8::handlePartialSequence<UChar>(UChar*& destination,
m_partialSequenceSize = count;
}
int character = decodeNonASCIISequence(m_partialSequence, count);
- if (character == nonCharacter) {
+ if (isNonCharacter(character)) {
handleError(destination, stopOnError, sawError);
if (stopOnError)
return false;
@@ -328,7 +336,7 @@ String TextCodecUTF8::decode(const char* bytes,
int count = nonASCIISequenceLength(*source);
int character;
if (count == 0) {
- character = nonCharacter;
+ character = nonCharacter1;
} else {
if (count > end - source) {
SECURITY_DCHECK(end - source <
@@ -341,7 +349,7 @@ String TextCodecUTF8::decode(const char* bytes,
}
character = decodeNonASCIISequence(source, count);
}
- if (character == nonCharacter) {
+ if (isNonCharacter(character)) {
sawError = true;
if (stopOnError)
break;
@@ -409,7 +417,7 @@ upConvertTo16Bit:
int count = nonASCIISequenceLength(*source);
int character;
if (count == 0) {
- character = nonCharacter;
+ character = nonCharacter1;
} else {
if (count > end - source) {
SECURITY_DCHECK(end - source <
@@ -422,13 +430,13 @@ upConvertTo16Bit:
}
character = decodeNonASCIISequence(source, count);
}
- if (character == nonCharacter) {
+ if (isNonCharacter(character)) {
sawError = true;
if (stopOnError)
break;
// Each error generates a replacement character and consumes one byte.
marja 2016/11/16 09:59:14 Pls fix this comment
vogelheim 2016/11/16 10:35:23 Done.
*destination16++ = replacementCharacter;
- ++source;
+ source -= character;
continue;
}
source += count;
« no previous file with comments | « third_party/WebKit/LayoutTests/fast/encoding/char-decoding-invalid-trail.html ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698