Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(394)

Side by Side Diff: vm/unicode.h

Issue 11419259: Fix bug in Utf8::CodePointCount which was causing some strings with latin1 (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/runtime/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « vm/symbols.cc ('k') | vm/unicode.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #ifndef VM_UNICODE_H_ 5 #ifndef VM_UNICODE_H_
6 #define VM_UNICODE_H_ 6 #define VM_UNICODE_H_
7 7
8 #include "vm/allocation.h" 8 #include "vm/allocation.h"
9 #include "vm/globals.h" 9 #include "vm/globals.h"
10 10
(...skipping 25 matching lines...) Expand all
36 36
37 37
38 class Utf8 : AllStatic { 38 class Utf8 : AllStatic {
39 public: 39 public:
40 enum Type { 40 enum Type {
41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF]. 41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF].
42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF]. 42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF].
43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. 43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF].
44 }; 44 };
45 45
46 static intptr_t CodePointCount(const uint8_t* utf8_array, 46 // Returns the most restricted coding form in which the sequence of utf8
47 intptr_t array_len, 47 // characters in 'utf8_array' can be represented in, and the number of
48 Type* type); 48 // code units needed in that form.
49 static intptr_t CodeUnitCount(const uint8_t* utf8_array,
50 intptr_t array_len,
51 Type* type);
49 52
50 // Returns true if 'utf8_array' is a valid UTF-8 string. 53 // Returns true if 'utf8_array' is a valid UTF-8 string.
51 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); 54 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len);
52 55
53 static intptr_t Length(int32_t ch); 56 static intptr_t Length(int32_t ch);
54 static intptr_t Length(const String& str); 57 static intptr_t Length(const String& str);
55 58
56 static intptr_t Encode(int32_t ch, char* dst); 59 static intptr_t Encode(int32_t ch, char* dst);
57 static intptr_t Encode(const String& src, char* dst, intptr_t len); 60 static intptr_t Encode(const String& src, char* dst, intptr_t len);
58 61
(...skipping 17 matching lines...) Expand all
76 int32_t* dst, 79 int32_t* dst,
77 intptr_t len); 80 intptr_t len);
78 81
79 private: 82 private:
80 static const int32_t kMaxOneByteChar = 0x7F; 83 static const int32_t kMaxOneByteChar = 0x7F;
81 static const int32_t kMaxTwoByteChar = 0x7FF; 84 static const int32_t kMaxTwoByteChar = 0x7FF;
82 static const int32_t kMaxThreeByteChar = 0xFFFF; 85 static const int32_t kMaxThreeByteChar = 0xFFFF;
83 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; 86 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint;
84 87
85 static bool IsTrailByte(uint8_t code_unit) { 88 static bool IsTrailByte(uint8_t code_unit) {
86 return (code_unit & 0xc0) == 0x80; 89 return (code_unit & 0xC0) == 0x80;
87 } 90 }
88 91
89 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { 92 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) {
90 return code_point < kOverlongMinimum[num_code_units]; 93 return code_point < kOverlongMinimum[num_code_units];
91 } 94 }
92 95
93 static bool IsLatin1SequenceStart(uint8_t code_unit) { 96 static bool IsLatin1SequenceStart(uint8_t code_unit) {
94 // Check is codepoint is <= U+00FF 97 // Check if utf8 sequence is the start of a codepoint <= U+00FF
95 return (code_unit <= Utf8::kMaxOneByteChar); 98 return (code_unit <= 0xC3);
96 } 99 }
97 100
98 static bool IsSupplementarySequenceStart(uint8_t code_unit) { 101 static bool IsSupplementarySequenceStart(uint8_t code_unit) {
99 // Check is codepoint is >= U+10000. 102 // Check if utf8 sequence is the start of a codepoint >= U+10000.
100 return (code_unit >= 0xF0); 103 return (code_unit >= 0xF0);
101 } 104 }
102 105
103 static const int8_t kTrailBytes[]; 106 static const int8_t kTrailBytes[];
104 static const uint32_t kMagicBits[]; 107 static const uint32_t kMagicBits[];
105 static const uint32_t kOverlongMinimum[]; 108 static const uint32_t kOverlongMinimum[];
106 }; 109 };
107 110
108 111
109 class Utf16 : AllStatic { 112 class Utf16 : AllStatic {
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
219 // Data for small code points with one mapping 222 // Data for small code points with one mapping
220 static const int16_t stage2_[]; 223 static const int16_t stage2_[];
221 224
222 // Data for large code points or code points with both mappings. 225 // Data for large code points or code points with both mappings.
223 static const int32_t stage2_exception_[][2]; 226 static const int32_t stage2_exception_[][2];
224 }; 227 };
225 228
226 } // namespace dart 229 } // namespace dart
227 230
228 #endif // VM_UNICODE_H_ 231 #endif // VM_UNICODE_H_
OLDNEW
« no previous file with comments | « vm/symbols.cc ('k') | vm/unicode.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698