runtime/vm/unicode.cc - Issue 11365243: Revert OneByteString back to ISO Latin-1 instead of ASCII

Side by Side Diff: runtime/vm/unicode.cc

Issue 11365243: Revert OneByteString back to ISO Latin-1 instead of ASCII (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 #include "vm/unicode.h"	5 #include "vm/unicode.h"

6	6

7 #include "vm/allocation.h"	7 #include "vm/allocation.h"

8 #include "vm/globals.h"	8 #include "vm/globals.h"

9 #include "vm/object.h"	9 #include "vm/object.h"

10	10

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
51 0xFFFFFFFF,	51 0xFFFFFFFF,

52 0xFFFFFFFF	52 0xFFFFFFFF

53 };	53 };

54	54

55	55

56 static bool IsTrailByte(uint8_t code_unit) {	56 static bool IsTrailByte(uint8_t code_unit) {

57 return (code_unit & 0xc0) == 0x80;	57 return (code_unit & 0xc0) == 0x80;

58 }	58 }

59	59

60	60

61 static bool IsAsciiSequenceStart(uint8_t code_unit) {	61 static bool IsLatin1SequenceStart(uint8_t code_unit) {

62 // Check is codepoint is <= U+007F	62 // Check is codepoint is <= U+00FF

63 return (code_unit <= Utf8::kMaxOneByteChar);	63 return (code_unit <= Utf8::kMaxOneByteChar);

64 }	64 }

65	65

66	66

67 static bool IsSmpSequenceStart(uint8_t code_unit) {	67 static bool IsSmpSequenceStart(uint8_t code_unit) {

68 // Check is codepoint is >= U+10000.	68 // Check is codepoint is >= U+10000.

69 return (code_unit >= 0xF0);	69 return (code_unit >= 0xF0);

70 }	70 }

71	71

72	72

(...skipping 21 matching lines...) Expand all Loading...
94 dst[0] = (Utf8::kLeadOffset + (codepoint >> 10));	94 dst[0] = (Utf8::kLeadOffset + (codepoint >> 10));

95 dst[1] = (0xDC00 + (codepoint & 0x3FF));	95 dst[1] = (0xDC00 + (codepoint & 0x3FF));

96 }	96 }

97	97

98	98

99 // Returns a count of the number of UTF-8 trail bytes.	99 // Returns a count of the number of UTF-8 trail bytes.

100 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array,	100 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array,

101 intptr_t array_len,	101 intptr_t array_len,

102 Type* type) {	102 Type* type) {

103 intptr_t len = 0;	103 intptr_t len = 0;

104 Type char_type = kAscii;	104 Type char_type = kLatin1;

105 for (intptr_t i = 0; i < array_len; i++) {	105 for (intptr_t i = 0; i < array_len; i++) {

106 uint8_t code_unit = utf8_array[i];	106 uint8_t code_unit = utf8_array[i];

107 if (!IsTrailByte(code_unit)) {	107 if (!IsTrailByte(code_unit)) {

108 ++len;	108 ++len;

109 }	109 }

110 if (!IsAsciiSequenceStart(code_unit)) { // > U+007F	110 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF

111 if (IsSmpSequenceStart(code_unit)) { // >= U+10000	111 if (IsSmpSequenceStart(code_unit)) { // >= U+10000

112 char_type = kSMP;	112 char_type = kSMP;

113 ++len;	113 ++len;

114 } else if (char_type == kAscii) {	114 } else if (char_type == kLatin1) {

115 char_type = kBMP;	115 char_type = kBMP;

116 }	116 }

117 }	117 }

118 }	118 }

119 *type = char_type;	119 *type = char_type;

120 return len;	120 return len;

121 }	121 }

122	122

123	123

124 // Returns true if str is a valid NUL-terminated UTF-8 string.	124 // Returns true if str is a valid NUL-terminated UTF-8 string.

(...skipping 119 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
244 !IsSurrogate(ch))) {	244 !IsSurrogate(ch))) {

245 *dst = -1;	245 *dst = -1;

246 return 0;	246 return 0;

247 }	247 }

248 }	248 }

249 *dst = ch;	249 *dst = ch;

250 return i;	250 return i;

251 }	251 }

252	252

253	253

254 bool Utf8::DecodeToAscii(const uint8_t* utf8_array,	254 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,

255 intptr_t array_len,	255 intptr_t array_len,

256 uint8_t* dst,	256 uint8_t* dst,

257 intptr_t len) {	257 intptr_t len) {

258 if (len < array_len) {	258 intptr_t i = 0;

	259 intptr_t j = 0;

	260 intptr_t num_bytes;

	261 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

	262 int32_t ch;

	263 ASSERT(IsLatin1SequenceStart(utf8_array[i]));

	264 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

	265 if (ch == -1) {

	266 return false; // invalid input

	267 }

	268 ASSERT(ch <= 0xff);

	269 dst[j] = ch;

	270 }

	271 if ((i < array_len) && (j == len)) {

259 return false; // output overflow	272 return false; // output overflow

260 }	273 }

261 #ifdef DEBUG

262 for (intptr_t i = 0; i < array_len; i++) {

263 ASSERT(IsAsciiSequenceStart(utf8_array[i]));

264 }

265 #endif

266 memmove(dst, utf8_array, array_len);

267 return true; // success	274 return true; // success

268 }	275 }

269	276

270	277

271 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,	278 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,

272 intptr_t array_len,	279 intptr_t array_len,

273 uint16_t* dst,	280 uint16_t* dst,

274 intptr_t len) {	281 intptr_t len) {

275 intptr_t i = 0;	282 intptr_t i = 0;

276 intptr_t j = 0;	283 intptr_t j = 0;

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
311 }	318 }

312 dst[j] = ch;	319 dst[j] = ch;

313 }	320 }

314 if ((i < array_len) && (j == len)) {	321 if ((i < array_len) && (j == len)) {

315 return false; // output overflow	322 return false; // output overflow

316 }	323 }

317 return true; // success	324 return true; // success

318 }	325 }

319	326

320 } // namespace dart	327 } // namespace dart

OLD	NEW

« runtime/vm/dart_api_message.cc ('K') | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »