Chromium Code Reviews| Index: src/unicode.cc |
| diff --git a/src/unicode.cc b/src/unicode.cc |
| index a63174c47d60eb90a5db28b33fe44b2b5bbdcbc9..83d4a0361847847c4ada44277276190cecd820a3 100644 |
| --- a/src/unicode.cc |
| +++ b/src/unicode.cc |
| @@ -235,35 +235,31 @@ uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) { |
| while (count < max_count && IsContinuationCharacter(str[count])) { |
| count++; |
| } |
| + *cursor += count; |
|
jbroman
2016/11/22 00:25:04
I lifted this here, because in both cases it was i
|
| - // Check overly long sequences & other conditions. Use length as error |
| - // indicator. |
| + // There must be enough continuation characters. |
| + if (count != length) return kBadChar; |
| + |
| + // Check overly long sequences & other conditions. |
| if (length == 3) { |
| if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) { |
| // Overlong three-byte sequence? |
| - length = 0; |
| + return kBadChar; |
| } else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) { |
| // High and low surrogate halves? |
| - length = 0; |
| + return kBadChar; |
| } |
| } else if (length == 4) { |
| if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) { |
| // Overlong four-byte sequence. |
| - length = 0; |
| + return kBadChar; |
| } else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) { |
| // Code points outside of the unicode range. |
| - length = 0; |
| + return kBadChar; |
| } |
| } |
| - if (count != length) { |
| - // All invalid encodings should land here. |
| - *cursor += count; |
| - return kBadChar; |
| - } |
| - |
| // All errors have been handled, so we only have to assemble the result. |
| - *cursor += length; |
| switch (length) { |
| case 1: |
| return str[0]; |