vm/unicode.cc - Issue 11419259: Fix bug in Utf8::CodePointCount which was causing some strings with latin1

Side by Side Diff: vm/unicode.cc

Issue 11419259: Fix bug in Utf8::CodePointCount which was causing some strings with latin1 (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/runtime/

Patch Set: Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 #include "vm/unicode.h"	5 #include "vm/unicode.h"

6	6

7 #include "vm/allocation.h"	7 #include "vm/allocation.h"

8 #include "vm/globals.h"	8 #include "vm/globals.h"

9 #include "vm/object.h"	9 #include "vm/object.h"

10	10

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
46 0, // Padding.	46 0, // Padding.

47 0x0,	47 0x0,

48 0x80,	48 0x80,

49 0x800,	49 0x800,

50 0x10000,	50 0x10000,

51 0xFFFFFFFF,	51 0xFFFFFFFF,

52 0xFFFFFFFF	52 0xFFFFFFFF

53 };	53 };

54	54

55	55

56 // Returns a count of the number of UTF-8 trail bytes.	56 // Returns the most restricted coding form in which the sequence of utf8

57 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array,	57 // characters in 'utf8_array' can be represented in, and the number of

58 intptr_t array_len,	58 // code units needed in that form.

59 Type* type) {	59 intptr_t Utf8::CodeUnitCount(const uint8_t* utf8_array,

	60 intptr_t array_len,

	61 Type* type) {

60 intptr_t len = 0;	62 intptr_t len = 0;

61 Type char_type = kLatin1;	63 Type char_type = kLatin1;

62 for (intptr_t i = 0; i < array_len; i++) {	64 for (intptr_t i = 0; i < array_len; i++) {

63 uint8_t code_unit = utf8_array[i];	65 uint8_t code_unit = utf8_array[i];

64 if (!IsTrailByte(code_unit)) {	66 if (!IsTrailByte(code_unit)) {

65 ++len;	67 ++len;

66 }	68 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF

67 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF	69 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000

68 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000	70 char_type = kSupplementary;

69 char_type = kSupplementary;	71 ++len;

70 ++len;	72 } else if (char_type == kLatin1) {

71 } else if (char_type == kLatin1) {	73 char_type = kBMP;

72 char_type = kBMP;	74 }

73 }	75 }

74 }	76 }

75 }	77 }

76 *type = char_type;	78 *type = char_type;

77 return len;	79 return len;

78 }	80 }

79	81

80	82

81 // Returns true if str is a valid NUL-terminated UTF-8 string.	83 // Returns true if str is a valid NUL-terminated UTF-8 string.

82 bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {	84 bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {

(...skipping 211 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
294	296

295	297

296 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {	298 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {

297 ASSERT(codepoint > Utf16::kMaxCodeUnit);	299 ASSERT(codepoint > Utf16::kMaxCodeUnit);

298 ASSERT(dst != NULL);	300 ASSERT(dst != NULL);

299 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));	301 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));

300 dst[1] = (0xDC00 + (codepoint & 0x3FF));	302 dst[1] = (0xDC00 + (codepoint & 0x3FF));

301 }	303 }

302	304

303 } // namespace dart	305 } // namespace dart

OLD	NEW

« no previous file with comments | « vm/unicode.h ('k') | no next file » | no next file with comments »