sdk/lib/utf/utf32.dart - Issue 11368138: Add some support for the code-point code-unit distinction.

Side by Side Diff: sdk/lib/utf/utf32.dart

Issue 11368138: Add some support for the code-point code-unit distinction. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Implemented feedback from patch set 3 Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 /**	5 /**

6 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert	6 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert

7 * as much of the input as needed. Determines the byte order from the BOM,	7 * as much of the input as needed. Determines the byte order from the BOM,

8 * or uses big-endian as a default. This method always strips a leading BOM.	8 * or uses big-endian as a default. This method always strips a leading BOM.

9 * Set the replacementCharacter to null to throw an ArgumentError	9 * Set the replacementCharacter to null to throw an ArgumentError

10 * rather than replace the bad value.	10 * rather than replace the bad value.

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
89 * bytes with a big-endian byte-order-marker.	89 * bytes with a big-endian byte-order-marker.

90 */	90 */

91 List<int> encodeUtf32(String str) =>	91 List<int> encodeUtf32(String str) =>

92 encodeUtf32be(str, true);	92 encodeUtf32be(str, true);

93	93

94 /**	94 /**

95 * Produce a list of UTF-32BE encoded bytes. By default, this method produces	95 * Produce a list of UTF-32BE encoded bytes. By default, this method produces

96 * UTF-32BE bytes with no BOM.	96 * UTF-32BE bytes with no BOM.

97 */	97 */

98 List<int> encodeUtf32be(String str, [bool writeBOM = false]) {	98 List<int> encodeUtf32be(String str, [bool writeBOM = false]) {

99 List<int> utf32CodeUnits = stringToCodepoints(str);	99 List<int> utf32CodeUnits = str.charCodes();

100 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +	100 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +

101 (writeBOM ? 4 : 0));	101 (writeBOM ? 4 : 0));

102 int i = 0;	102 int i = 0;

103 if (writeBOM) {	103 if (writeBOM) {

104 encoding[i++] = 0;	104 encoding[i++] = 0;

105 encoding[i++] = 0;	105 encoding[i++] = 0;

106 encoding[i++] = UNICODE_UTF_BOM_HI;	106 encoding[i++] = UNICODE_UTF_BOM_HI;

107 encoding[i++] = UNICODE_UTF_BOM_LO;	107 encoding[i++] = UNICODE_UTF_BOM_LO;

108 }	108 }

109 for (int unit in utf32CodeUnits) {	109 for (int unit in utf32CodeUnits) {

110 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK;	110 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK;

111 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK;	111 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK;

112 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK;	112 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK;

113 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;	113 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;

114 }	114 }

115 return encoding;	115 return encoding;

116 }	116 }

117	117

118 /**	118 /**

119 * Produce a list of UTF-32LE encoded bytes. By default, this method produces	119 * Produce a list of UTF-32LE encoded bytes. By default, this method produces

120 * UTF-32BE bytes with no BOM.	120 * UTF-32BE bytes with no BOM.

121 */	121 */

122 List<int> encodeUtf32le(String str, [bool writeBOM = false]) {	122 List<int> encodeUtf32le(String str, [bool writeBOM = false]) {

123 List<int> utf32CodeUnits = stringToCodepoints(str);	123 List<int> utf32CodeUnits = str.charCodes();

124 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +	124 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +

125 (writeBOM ? 4 : 0));	125 (writeBOM ? 4 : 0));

126 int i = 0;	126 int i = 0;

127 if (writeBOM) {	127 if (writeBOM) {

128 encoding[i++] = UNICODE_UTF_BOM_LO;	128 encoding[i++] = UNICODE_UTF_BOM_LO;

129 encoding[i++] = UNICODE_UTF_BOM_HI;	129 encoding[i++] = UNICODE_UTF_BOM_HI;

130 encoding[i++] = 0;	130 encoding[i++] = 0;

131 encoding[i++] = 0;	131 encoding[i++] = 0;

132 }	132 }

133 for (int unit in utf32CodeUnits) {	133 for (int unit in utf32CodeUnits) {

(...skipping 176 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
310 value += (utf32EncodedBytesIterator.next() << 24);	310 value += (utf32EncodedBytesIterator.next() << 24);

311 return value;	311 return value;

312 }	312 }

313 }	313 }

314	314

315 bool _validCodepoint(int codepoint) {	315 bool _validCodepoint(int codepoint) {

316 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) \|\|	316 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) \|\|

317 (codepoint > UNICODE_UTF16_RESERVED_HI &&	317 (codepoint > UNICODE_UTF16_RESERVED_HI &&

318 codepoint < UNICODE_VALID_RANGE_MAX);	318 codepoint < UNICODE_VALID_RANGE_MAX);

319 }	319 }

OLD	NEW

« runtime/vm/unicode.h ('K') | « sdk/lib/uri/encode_decode.dart ('k') | sdk/lib/utf/utf8.dart » ('j') | no next file with comments »