Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(530)

Side by Side Diff: vm/unicode.h

Issue 11419259: Fix bug in Utf8::CodePointCount which was causing some strings with latin1 (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/runtime/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « vm/symbols.cc ('k') | vm/unicode.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #ifndef VM_UNICODE_H_ 5 #ifndef VM_UNICODE_H_
6 #define VM_UNICODE_H_ 6 #define VM_UNICODE_H_
7 7
8 #include "vm/allocation.h" 8 #include "vm/allocation.h"
9 #include "vm/globals.h" 9 #include "vm/globals.h"
10 10
(...skipping 25 matching lines...) Expand all
36 36
37 37
38 class Utf8 : AllStatic { 38 class Utf8 : AllStatic {
39 public: 39 public:
40 enum Type { 40 enum Type {
41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF]. 41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF].
42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF]. 42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF].
43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. 43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF].
44 }; 44 };
45 45
46 static intptr_t CodePointCount(const uint8_t* utf8_array, 46 // Returns a count of the number of UTF-16 code units needed to represent the
cshapiro 2012/11/30 21:32:26 This is not strictly true, right? This returns th
siva 2012/11/30 21:47:19 Changed the comment to: Returns the most restricte
47 intptr_t array_len, 47 // sequence of utf8 characters in 'utf8_array'.
48 Type* type); 48 static intptr_t CodeUnitCount(const uint8_t* utf8_array,
49 intptr_t array_len,
50 Type* type);
49 51
50 // Returns true if 'utf8_array' is a valid UTF-8 string. 52 // Returns true if 'utf8_array' is a valid UTF-8 string.
51 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); 53 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len);
52 54
53 static intptr_t Length(int32_t ch); 55 static intptr_t Length(int32_t ch);
54 static intptr_t Length(const String& str); 56 static intptr_t Length(const String& str);
55 57
56 static intptr_t Encode(int32_t ch, char* dst); 58 static intptr_t Encode(int32_t ch, char* dst);
57 static intptr_t Encode(const String& src, char* dst, intptr_t len); 59 static intptr_t Encode(const String& src, char* dst, intptr_t len);
58 60
(...skipping 16 matching lines...) Expand all
75 static bool DecodeCStringToUTF32(const char* str, 77 static bool DecodeCStringToUTF32(const char* str,
76 int32_t* dst, 78 int32_t* dst,
77 intptr_t len); 79 intptr_t len);
78 80
79 private: 81 private:
80 static const int32_t kMaxOneByteChar = 0x7F; 82 static const int32_t kMaxOneByteChar = 0x7F;
81 static const int32_t kMaxTwoByteChar = 0x7FF; 83 static const int32_t kMaxTwoByteChar = 0x7FF;
82 static const int32_t kMaxThreeByteChar = 0xFFFF; 84 static const int32_t kMaxThreeByteChar = 0xFFFF;
83 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; 85 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint;
84 86
85 static bool IsTrailByte(uint8_t code_unit) { 87 static bool IsTrailByte(uint8_t utf8_byte) {
cshapiro 2012/11/30 21:32:26 the utf-8 spec removed all mention of "byte" and r
siva 2012/11/30 21:47:19 Done.
86 return (code_unit & 0xc0) == 0x80; 88 return (utf8_byte & 0xC0) == 0x80;
87 } 89 }
88 90
89 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { 91 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) {
90 return code_point < kOverlongMinimum[num_code_units]; 92 return code_point < kOverlongMinimum[num_code_units];
91 } 93 }
92 94
93 static bool IsLatin1SequenceStart(uint8_t code_unit) { 95 static bool IsLatin1SequenceStart(uint8_t utf8_byte) {
94 // Check is codepoint is <= U+00FF 96 // Check if utf8 sequence is start of a codepoint <= U+00FF
95 return (code_unit <= Utf8::kMaxOneByteChar); 97 return (utf8_byte <= 0xC3);
96 } 98 }
97 99
98 static bool IsSupplementarySequenceStart(uint8_t code_unit) { 100 static bool IsSupplementarySequenceStart(uint8_t utf8_byte) {
99 // Check is codepoint is >= U+10000. 101 // Check if utf8 sequence is start of a codepoint >= U+10000.
100 return (code_unit >= 0xF0); 102 return (utf8_byte >= 0xF0);
101 } 103 }
102 104
103 static const int8_t kTrailBytes[]; 105 static const int8_t kTrailBytes[];
104 static const uint32_t kMagicBits[]; 106 static const uint32_t kMagicBits[];
105 static const uint32_t kOverlongMinimum[]; 107 static const uint32_t kOverlongMinimum[];
106 }; 108 };
107 109
108 110
109 class Utf16 : AllStatic { 111 class Utf16 : AllStatic {
110 public: 112 public:
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
219 // Data for small code points with one mapping 221 // Data for small code points with one mapping
220 static const int16_t stage2_[]; 222 static const int16_t stage2_[];
221 223
222 // Data for large code points or code points with both mappings. 224 // Data for large code points or code points with both mappings.
223 static const int32_t stage2_exception_[][2]; 225 static const int32_t stage2_exception_[][2];
224 }; 226 };
225 227
226 } // namespace dart 228 } // namespace dart
227 229
228 #endif // VM_UNICODE_H_ 230 #endif // VM_UNICODE_H_
OLDNEW
« no previous file with comments | « vm/symbols.cc ('k') | vm/unicode.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698