Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(738)

Side by Side Diff: runtime/vm/unicode.cc

Issue 11365243: Revert OneByteString back to ISO Latin-1 instead of ASCII (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/unicode.h" 5 #include "vm/unicode.h"
6 6
7 #include "vm/allocation.h" 7 #include "vm/allocation.h"
8 #include "vm/globals.h" 8 #include "vm/globals.h"
9 #include "vm/object.h" 9 #include "vm/object.h"
10 10
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
51 0xFFFFFFFF, 51 0xFFFFFFFF,
52 0xFFFFFFFF 52 0xFFFFFFFF
53 }; 53 };
54 54
55 55
56 static bool IsTrailByte(uint8_t code_unit) { 56 static bool IsTrailByte(uint8_t code_unit) {
57 return (code_unit & 0xc0) == 0x80; 57 return (code_unit & 0xc0) == 0x80;
58 } 58 }
59 59
60 60
61 static bool IsAsciiSequenceStart(uint8_t code_unit) { 61 static bool IsLatin1SequenceStart(uint8_t code_unit) {
62 // Check is codepoint is <= U+007F 62 // Check is codepoint is <= U+00FF
63 return (code_unit <= Utf8::kMaxOneByteChar); 63 return (code_unit <= Utf8::kMaxOneByteChar);
64 } 64 }
65 65
66 66
67 static bool IsSmpSequenceStart(uint8_t code_unit) { 67 static bool IsSmpSequenceStart(uint8_t code_unit) {
68 // Check is codepoint is >= U+10000. 68 // Check is codepoint is >= U+10000.
69 return (code_unit >= 0xF0); 69 return (code_unit >= 0xF0);
70 } 70 }
71 71
72 72
(...skipping 21 matching lines...) Expand all
94 dst[0] = (Utf8::kLeadOffset + (codepoint >> 10)); 94 dst[0] = (Utf8::kLeadOffset + (codepoint >> 10));
95 dst[1] = (0xDC00 + (codepoint & 0x3FF)); 95 dst[1] = (0xDC00 + (codepoint & 0x3FF));
96 } 96 }
97 97
98 98
99 // Returns a count of the number of UTF-8 trail bytes. 99 // Returns a count of the number of UTF-8 trail bytes.
100 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, 100 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array,
101 intptr_t array_len, 101 intptr_t array_len,
102 Type* type) { 102 Type* type) {
103 intptr_t len = 0; 103 intptr_t len = 0;
104 Type char_type = kAscii; 104 Type char_type = kLatin1;
105 for (intptr_t i = 0; i < array_len; i++) { 105 for (intptr_t i = 0; i < array_len; i++) {
106 uint8_t code_unit = utf8_array[i]; 106 uint8_t code_unit = utf8_array[i];
107 if (!IsTrailByte(code_unit)) { 107 if (!IsTrailByte(code_unit)) {
108 ++len; 108 ++len;
109 } 109 }
110 if (!IsAsciiSequenceStart(code_unit)) { // > U+007F 110 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF
111 if (IsSmpSequenceStart(code_unit)) { // >= U+10000 111 if (IsSmpSequenceStart(code_unit)) { // >= U+10000
112 char_type = kSMP; 112 char_type = kSMP;
113 ++len; 113 ++len;
114 } else if (char_type == kAscii) { 114 } else if (char_type == kLatin1) {
115 char_type = kBMP; 115 char_type = kBMP;
116 } 116 }
117 } 117 }
118 } 118 }
119 *type = char_type; 119 *type = char_type;
120 return len; 120 return len;
121 } 121 }
122 122
123 123
124 // Returns true if str is a valid NUL-terminated UTF-8 string. 124 // Returns true if str is a valid NUL-terminated UTF-8 string.
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
244 !IsSurrogate(ch))) { 244 !IsSurrogate(ch))) {
245 *dst = -1; 245 *dst = -1;
246 return 0; 246 return 0;
247 } 247 }
248 } 248 }
249 *dst = ch; 249 *dst = ch;
250 return i; 250 return i;
251 } 251 }
252 252
253 253
254 bool Utf8::DecodeToAscii(const uint8_t* utf8_array, 254 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,
255 intptr_t array_len, 255 intptr_t array_len,
256 uint8_t* dst, 256 uint8_t* dst,
257 intptr_t len) { 257 intptr_t len) {
258 if (len < array_len) { 258 intptr_t i = 0;
259 intptr_t j = 0;
260 intptr_t num_bytes;
261 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
262 int32_t ch;
263 ASSERT(IsLatin1SequenceStart(utf8_array[i]));
264 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
265 if (ch == -1) {
266 return false; // invalid input
267 }
268 ASSERT(ch <= 0xff);
269 dst[j] = ch;
270 }
271 if ((i < array_len) && (j == len)) {
259 return false; // output overflow 272 return false; // output overflow
260 } 273 }
261 #ifdef DEBUG
262 for (intptr_t i = 0; i < array_len; i++) {
263 ASSERT(IsAsciiSequenceStart(utf8_array[i]));
264 }
265 #endif
266 memmove(dst, utf8_array, array_len);
267 return true; // success 274 return true; // success
268 } 275 }
269 276
270 277
271 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, 278 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,
272 intptr_t array_len, 279 intptr_t array_len,
273 uint16_t* dst, 280 uint16_t* dst,
274 intptr_t len) { 281 intptr_t len) {
275 intptr_t i = 0; 282 intptr_t i = 0;
276 intptr_t j = 0; 283 intptr_t j = 0;
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
311 } 318 }
312 dst[j] = ch; 319 dst[j] = ch;
313 } 320 }
314 if ((i < array_len) && (j == len)) { 321 if ((i < array_len) && (j == len)) {
315 return false; // output overflow 322 return false; // output overflow
316 } 323 }
317 return true; // success 324 return true; // success
318 } 325 }
319 326
320 } // namespace dart 327 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698