vm/unicode.h - Issue 11419259: Fix bug in Utf8::CodePointCount which was causing some strings with latin1

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: vm/unicode.h

Issue 11419259: Fix bug in Utf8::CodePointCount which was causing some strings with latin1 (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/runtime/

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: vm/unicode.h

===================================================================

--- vm/unicode.h (revision 15591)

+++ vm/unicode.h (working copy)

@@ -43,9 +43,12 @@

kSupplementary, // Supplementary code point [U+010000, U+10FFFF].

};

- static intptr_t CodePointCount(const uint8_t* utf8_array,

- intptr_t array_len,

- Type* type);

+ // Returns the most restricted coding form in which the sequence of utf8

+ // characters in 'utf8_array' can be represented in, and the number of

+ // code units needed in that form.

+ static intptr_t CodeUnitCount(const uint8_t* utf8_array,

+ intptr_t array_len,

+ Type* type);

// Returns true if 'utf8_array' is a valid UTF-8 string.

static bool IsValid(const uint8_t* utf8_array, intptr_t array_len);

@@ -83,7 +86,7 @@

static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint;

static bool IsTrailByte(uint8_t code_unit) {

- return (code_unit & 0xc0) == 0x80;

+ return (code_unit & 0xC0) == 0x80;

}

static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) {

@@ -91,12 +94,12 @@

}

static bool IsLatin1SequenceStart(uint8_t code_unit) {

- // Check is codepoint is <= U+00FF

- return (code_unit <= Utf8::kMaxOneByteChar);

+ // Check if utf8 sequence is the start of a codepoint <= U+00FF

+ return (code_unit <= 0xC3);

}

static bool IsSupplementarySequenceStart(uint8_t code_unit) {

- // Check is codepoint is >= U+10000.

+ // Check if utf8 sequence is the start of a codepoint >= U+10000.

return (code_unit >= 0xF0);

}

« no previous file with comments | « vm/symbols.cc ('k') | vm/unicode.cc » ('j') | no next file with comments »