Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(212)

Side by Side Diff: vm/unicode.cc

Issue 11419259: Fix bug in Utf8::CodePointCount which was causing some strings with latin1 (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/runtime/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « vm/unicode.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/unicode.h" 5 #include "vm/unicode.h"
6 6
7 #include "vm/allocation.h" 7 #include "vm/allocation.h"
8 #include "vm/globals.h" 8 #include "vm/globals.h"
9 #include "vm/object.h" 9 #include "vm/object.h"
10 10
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
46 0, // Padding. 46 0, // Padding.
47 0x0, 47 0x0,
48 0x80, 48 0x80,
49 0x800, 49 0x800,
50 0x10000, 50 0x10000,
51 0xFFFFFFFF, 51 0xFFFFFFFF,
52 0xFFFFFFFF 52 0xFFFFFFFF
53 }; 53 };
54 54
55 55
56 // Returns a count of the number of UTF-8 trail bytes. 56 // Returns the most restricted coding form in which the sequence of utf8
57 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, 57 // characters in 'utf8_array' can be represented in, and the number of
58 intptr_t array_len, 58 // code units needed in that form.
59 Type* type) { 59 intptr_t Utf8::CodeUnitCount(const uint8_t* utf8_array,
60 intptr_t array_len,
61 Type* type) {
60 intptr_t len = 0; 62 intptr_t len = 0;
61 Type char_type = kLatin1; 63 Type char_type = kLatin1;
62 for (intptr_t i = 0; i < array_len; i++) { 64 for (intptr_t i = 0; i < array_len; i++) {
63 uint8_t code_unit = utf8_array[i]; 65 uint8_t code_unit = utf8_array[i];
64 if (!IsTrailByte(code_unit)) { 66 if (!IsTrailByte(code_unit)) {
65 ++len; 67 ++len;
66 } 68 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF
67 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF 69 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000
68 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000 70 char_type = kSupplementary;
69 char_type = kSupplementary; 71 ++len;
70 ++len; 72 } else if (char_type == kLatin1) {
71 } else if (char_type == kLatin1) { 73 char_type = kBMP;
72 char_type = kBMP; 74 }
73 } 75 }
74 } 76 }
75 } 77 }
76 *type = char_type; 78 *type = char_type;
77 return len; 79 return len;
78 } 80 }
79 81
80 82
81 // Returns true if str is a valid NUL-terminated UTF-8 string. 83 // Returns true if str is a valid NUL-terminated UTF-8 string.
82 bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) { 84 bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after
294 296
295 297
296 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { 298 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {
297 ASSERT(codepoint > Utf16::kMaxCodeUnit); 299 ASSERT(codepoint > Utf16::kMaxCodeUnit);
298 ASSERT(dst != NULL); 300 ASSERT(dst != NULL);
299 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); 301 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));
300 dst[1] = (0xDC00 + (codepoint & 0x3FF)); 302 dst[1] = (0xDC00 + (codepoint & 0x3FF));
301 } 303 }
302 304
303 } // namespace dart 305 } // namespace dart
OLDNEW
« no previous file with comments | « vm/unicode.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698