src/unicode.cc - Issue 2391273002: Fix bad-char handling in utf-8 streaming streams. Also add test.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: src/unicode.cc

Issue 2391273002: Fix bad-char handling in utf-8 streaming streams. Also add test. (Closed)

Patch Set: Improve comments. Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/unicode.cc

diff --git a/src/unicode.cc b/src/unicode.cc

index 9fd39a75ebc6c506d6a42ebf99273d306f6221b6..fa4afc59965d68d3fc5cac56655ce0051e0aa0c3 100644

--- a/src/unicode.cc

+++ b/src/unicode.cc

@@ -333,25 +333,54 @@ uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) {

*buffer = 0;

return kBadChar;

}

- } else {

+ } else if (*buffer <= 0xff) {

+ // We have one unprocessed byte left (from the last else case in this if

+ // statement).

+ uchar previous = *buffer;

+ *buffer = 0;

+ uchar t = ValueOfIncremental(previous, buffer);

+ if (t == kIncomplete) {

+ // If we have an incomplete character, process both the previous and the

+ // next byte at once.

+ return ValueOfIncremental(next, buffer);

+ } else {

+ // Otherwise, process the previous byte and save the next byte for next

+ // time.

+ DCHECK_EQ(0, *buffer);

+ *buffer = next;

+ return t;

+ }

+ } else if (IsContinuationCharacter(next)) {

// We're inside of a character, as described by buffer.

- if (IsContinuationCharacter(next)) {

- // How many bytes (excluding this one) do we still expect?

- uint8_t count = (*buffer >> 24) - 1;

- // Update the value.

- uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F);

- if (count) {

- *buffer = count << 24 | value;

- return kIncomplete;

- } else {

- *buffer = 0;

- return value;

- }

+ // How many bytes (excluding this one) do we still expect?

+ uint8_t count = (*buffer >> 24) - 1;

+ // Update the value.

+ uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F);

+ if (count) {

+ *buffer = count << 24 | value;

+ return kIncomplete;

} else {

- // Within a character, but not a continuation character? Bad char.

*buffer = 0;

- return kBadChar;

+ return value;

}

+ } else {

+ // Within a character, but not a continuation character? Then the

+ // previous char was a bad char. But we need to save the current

+ // one.

+ *buffer = next;

+ return kBadChar;

+ }

+uchar Utf8::ValueOfIncrementalFinish(Utf8IncrementalBuffer* buffer) {

+ DCHECK_NOT_NULL(buffer);

+ if (*buffer == 0) {

+ return kBufferEmpty;

+ } else {

+ // Process left-over chars. An incomplete char at the end maps to kBadChar.

+ uchar t = ValueOfIncremental(0, buffer);

+ return (t == kIncomplete) ? kBadChar : t;

}

« src/parsing/scanner-character-streams.cc ('K') | « src/unicode.h ('k') | test/cctest/parsing/test-scanner-streams.cc » ('j') | no next file with comments »