Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1274)

Side by Side Diff: runtime/vm/unicode.cc

Issue 11299084: Correct a misnomer regarding supplementary code points. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: missed a few uses of smp Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/unicode.h" 5 #include "vm/unicode.h"
6 6
7 #include "vm/allocation.h" 7 #include "vm/allocation.h"
8 #include "vm/globals.h" 8 #include "vm/globals.h"
9 #include "vm/object.h" 9 #include "vm/object.h"
10 10
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
57 return (code_unit & 0xc0) == 0x80; 57 return (code_unit & 0xc0) == 0x80;
58 } 58 }
59 59
60 60
61 static bool IsLatin1SequenceStart(uint8_t code_unit) { 61 static bool IsLatin1SequenceStart(uint8_t code_unit) {
62 // Check is codepoint is <= U+00FF 62 // Check is codepoint is <= U+00FF
63 return (code_unit <= Utf8::kMaxOneByteChar); 63 return (code_unit <= Utf8::kMaxOneByteChar);
64 } 64 }
65 65
66 66
67 static bool IsSmpSequenceStart(uint8_t code_unit) { 67 static bool IsSupplementarySequenceStart(uint8_t code_unit) {
68 // Check is codepoint is >= U+10000. 68 // Check is codepoint is >= U+10000.
69 return (code_unit >= 0xF0); 69 return (code_unit >= 0xF0);
70 } 70 }
71 71
72 72
73 // Returns true if the code point value is above Plane 17. 73 // Returns true if the code point value is above Plane 17.
74 static bool IsOutOfRange(uint32_t code_point) { 74 static bool IsOutOfRange(uint32_t code_point) {
75 return (code_point > 0x10FFFF); 75 return (code_point > 0x10FFFF);
76 } 76 }
77 77
78 78
79 // Returns true if the byte sequence is ill-formed. 79 // Returns true if the byte sequence is ill-formed.
80 static bool IsNonShortestForm(uint32_t code_point, size_t num_bytes) { 80 static bool IsNonShortestForm(uint32_t code_point, size_t num_bytes) {
81 return code_point < kOverlongMinimum[num_bytes]; 81 return code_point < kOverlongMinimum[num_bytes];
82 } 82 }
83 83
84 84
85 // Returns a count of the number of UTF-8 trail bytes. 85 // Returns a count of the number of UTF-8 trail bytes.
86 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, 86 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array,
87 intptr_t array_len, 87 intptr_t array_len,
88 Type* type) { 88 Type* type) {
89 intptr_t len = 0; 89 intptr_t len = 0;
90 Type char_type = kLatin1; 90 Type char_type = kLatin1;
91 for (intptr_t i = 0; i < array_len; i++) { 91 for (intptr_t i = 0; i < array_len; i++) {
92 uint8_t code_unit = utf8_array[i]; 92 uint8_t code_unit = utf8_array[i];
93 if (!IsTrailByte(code_unit)) { 93 if (!IsTrailByte(code_unit)) {
94 ++len; 94 ++len;
95 } 95 }
96 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF 96 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF
97 if (IsSmpSequenceStart(code_unit)) { // >= U+10000 97 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000
98 char_type = kSMP; 98 char_type = kSupplementary;
99 ++len; 99 ++len;
100 } else if (char_type == kLatin1) { 100 } else if (char_type == kLatin1) {
101 char_type = kBMP; 101 char_type = kBMP;
102 } 102 }
103 } 103 }
104 } 104 }
105 *type = char_type; 105 *type = char_type;
106 return len; 106 return len;
107 } 107 }
108 108
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after
265 265
266 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, 266 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,
267 intptr_t array_len, 267 intptr_t array_len,
268 uint16_t* dst, 268 uint16_t* dst,
269 intptr_t len) { 269 intptr_t len) {
270 intptr_t i = 0; 270 intptr_t i = 0;
271 intptr_t j = 0; 271 intptr_t j = 0;
272 intptr_t num_bytes; 272 intptr_t num_bytes;
273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { 273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
274 int32_t ch; 274 int32_t ch;
275 bool is_smp = IsSmpSequenceStart(utf8_array[i]); 275 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);
276 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); 276 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
277 if (ch == -1) { 277 if (ch == -1) {
278 return false; // invalid input 278 return false; // invalid input
279 } 279 }
280 if (is_smp) { 280 if (is_supplementary) {
281 Utf16::Encode(ch, &dst[j]); 281 Utf16::Encode(ch, &dst[j]);
282 j = j + 1; 282 j = j + 1;
283 } else { 283 } else {
284 dst[j] = ch; 284 dst[j] = ch;
285 } 285 }
286 } 286 }
287 if ((i < array_len) && (j == len)) { 287 if ((i < array_len) && (j == len)) {
288 return false; // output overflow 288 return false; // output overflow
289 } 289 }
290 return true; // success 290 return true; // success
(...skipping 23 matching lines...) Expand all
314 314
315 315
316 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { 316 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {
317 ASSERT(codepoint > kMaxBmpCodepoint); 317 ASSERT(codepoint > kMaxBmpCodepoint);
318 ASSERT(dst != NULL); 318 ASSERT(dst != NULL);
319 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); 319 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));
320 dst[1] = (0xDC00 + (codepoint & 0x3FF)); 320 dst[1] = (0xDC00 + (codepoint & 0x3FF));
321 } 321 }
322 322
323 } // namespace dart 323 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698