| Index: src/unicode.cc
|
| diff --git a/src/unicode.cc b/src/unicode.cc
|
| index 9fd39a75ebc6c506d6a42ebf99273d306f6221b6..fa4afc59965d68d3fc5cac56655ce0051e0aa0c3 100644
|
| --- a/src/unicode.cc
|
| +++ b/src/unicode.cc
|
| @@ -333,25 +333,54 @@ uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) {
|
| *buffer = 0;
|
| return kBadChar;
|
| }
|
| - } else {
|
| + } else if (*buffer <= 0xff) {
|
| + // We have one unprocessed byte left (from the last else case in this if
|
| + // statement).
|
| + uchar previous = *buffer;
|
| + *buffer = 0;
|
| + uchar t = ValueOfIncremental(previous, buffer);
|
| + if (t == kIncomplete) {
|
| + // If we have an incomplete character, process both the previous and the
|
| + // next byte at once.
|
| + return ValueOfIncremental(next, buffer);
|
| + } else {
|
| + // Otherwise, process the previous byte and save the next byte for next
|
| + // time.
|
| + DCHECK_EQ(0, *buffer);
|
| + *buffer = next;
|
| + return t;
|
| + }
|
| + } else if (IsContinuationCharacter(next)) {
|
| // We're inside of a character, as described by buffer.
|
| - if (IsContinuationCharacter(next)) {
|
| - // How many bytes (excluding this one) do we still expect?
|
| - uint8_t count = (*buffer >> 24) - 1;
|
| - // Update the value.
|
| - uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F);
|
| - if (count) {
|
| - *buffer = count << 24 | value;
|
| - return kIncomplete;
|
| - } else {
|
| - *buffer = 0;
|
| - return value;
|
| - }
|
| +
|
| + // How many bytes (excluding this one) do we still expect?
|
| + uint8_t count = (*buffer >> 24) - 1;
|
| + // Update the value.
|
| + uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F);
|
| + if (count) {
|
| + *buffer = count << 24 | value;
|
| + return kIncomplete;
|
| } else {
|
| - // Within a character, but not a continuation character? Bad char.
|
| *buffer = 0;
|
| - return kBadChar;
|
| + return value;
|
| }
|
| + } else {
|
| + // Within a character, but not a continuation character? Then the
|
| + // previous char was a bad char. But we need to save the current
|
| + // one.
|
| + *buffer = next;
|
| + return kBadChar;
|
| + }
|
| +}
|
| +
|
| +uchar Utf8::ValueOfIncrementalFinish(Utf8IncrementalBuffer* buffer) {
|
| + DCHECK_NOT_NULL(buffer);
|
| + if (*buffer == 0) {
|
| + return kBufferEmpty;
|
| + } else {
|
| + // Process left-over chars. An incomplete char at the end maps to kBadChar.
|
| + uchar t = ValueOfIncremental(0, buffer);
|
| + return (t == kIncomplete) ? kBadChar : t;
|
| }
|
| }
|
|
|
|
|