Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1489)

Unified Diff: vm/unicode.h

Issue 11419259: Fix bug in Utf8::CodePointCount which was causing some strings with latin1 (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/runtime/
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « vm/symbols.cc ('k') | vm/unicode.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: vm/unicode.h
===================================================================
--- vm/unicode.h (revision 15591)
+++ vm/unicode.h (working copy)
@@ -43,9 +43,11 @@
kSupplementary, // Supplementary code point [U+010000, U+10FFFF].
};
- static intptr_t CodePointCount(const uint8_t* utf8_array,
- intptr_t array_len,
- Type* type);
+ // Returns a count of the number of UTF-16 code units needed to represent the
cshapiro 2012/11/30 21:32:26 This is not strictly true, right? This returns th
siva 2012/11/30 21:47:19 Changed the comment to: Returns the most restricte
+ // sequence of utf8 characters in 'utf8_array'.
+ static intptr_t CodeUnitCount(const uint8_t* utf8_array,
+ intptr_t array_len,
+ Type* type);
// Returns true if 'utf8_array' is a valid UTF-8 string.
static bool IsValid(const uint8_t* utf8_array, intptr_t array_len);
@@ -82,22 +84,22 @@
static const int32_t kMaxThreeByteChar = 0xFFFF;
static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint;
- static bool IsTrailByte(uint8_t code_unit) {
- return (code_unit & 0xc0) == 0x80;
+ static bool IsTrailByte(uint8_t utf8_byte) {
cshapiro 2012/11/30 21:32:26 the utf-8 spec removed all mention of "byte" and r
siva 2012/11/30 21:47:19 Done.
+ return (utf8_byte & 0xC0) == 0x80;
}
static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) {
return code_point < kOverlongMinimum[num_code_units];
}
- static bool IsLatin1SequenceStart(uint8_t code_unit) {
- // Check is codepoint is <= U+00FF
- return (code_unit <= Utf8::kMaxOneByteChar);
+ static bool IsLatin1SequenceStart(uint8_t utf8_byte) {
+ // Check if utf8 sequence is start of a codepoint <= U+00FF
+ return (utf8_byte <= 0xC3);
}
- static bool IsSupplementarySequenceStart(uint8_t code_unit) {
- // Check is codepoint is >= U+10000.
- return (code_unit >= 0xF0);
+ static bool IsSupplementarySequenceStart(uint8_t utf8_byte) {
+ // Check if utf8 sequence is start of a codepoint >= U+10000.
+ return (utf8_byte >= 0xF0);
}
static const int8_t kTrailBytes[];
« no previous file with comments | « vm/symbols.cc ('k') | vm/unicode.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698