Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(344)

Side by Side Diff: sdk/lib/utf/utf32.dart

Issue 11368138: Add some support for the code-point code-unit distinction. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Implemented feedback from patch set 3 Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 /** 5 /**
6 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert 6 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert
7 * as much of the input as needed. Determines the byte order from the BOM, 7 * as much of the input as needed. Determines the byte order from the BOM,
8 * or uses big-endian as a default. This method always strips a leading BOM. 8 * or uses big-endian as a default. This method always strips a leading BOM.
9 * Set the replacementCharacter to null to throw an ArgumentError 9 * Set the replacementCharacter to null to throw an ArgumentError
10 * rather than replace the bad value. 10 * rather than replace the bad value.
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 * bytes with a big-endian byte-order-marker. 89 * bytes with a big-endian byte-order-marker.
90 */ 90 */
91 List<int> encodeUtf32(String str) => 91 List<int> encodeUtf32(String str) =>
92 encodeUtf32be(str, true); 92 encodeUtf32be(str, true);
93 93
94 /** 94 /**
95 * Produce a list of UTF-32BE encoded bytes. By default, this method produces 95 * Produce a list of UTF-32BE encoded bytes. By default, this method produces
96 * UTF-32BE bytes with no BOM. 96 * UTF-32BE bytes with no BOM.
97 */ 97 */
98 List<int> encodeUtf32be(String str, [bool writeBOM = false]) { 98 List<int> encodeUtf32be(String str, [bool writeBOM = false]) {
99 List<int> utf32CodeUnits = stringToCodepoints(str); 99 List<int> utf32CodeUnits = str.charCodes();
100 List<int> encoding = new List<int>(4 * utf32CodeUnits.length + 100 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +
101 (writeBOM ? 4 : 0)); 101 (writeBOM ? 4 : 0));
102 int i = 0; 102 int i = 0;
103 if (writeBOM) { 103 if (writeBOM) {
104 encoding[i++] = 0; 104 encoding[i++] = 0;
105 encoding[i++] = 0; 105 encoding[i++] = 0;
106 encoding[i++] = UNICODE_UTF_BOM_HI; 106 encoding[i++] = UNICODE_UTF_BOM_HI;
107 encoding[i++] = UNICODE_UTF_BOM_LO; 107 encoding[i++] = UNICODE_UTF_BOM_LO;
108 } 108 }
109 for (int unit in utf32CodeUnits) { 109 for (int unit in utf32CodeUnits) {
110 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; 110 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK;
111 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; 111 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK;
112 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; 112 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK;
113 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; 113 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;
114 } 114 }
115 return encoding; 115 return encoding;
116 } 116 }
117 117
118 /** 118 /**
119 * Produce a list of UTF-32LE encoded bytes. By default, this method produces 119 * Produce a list of UTF-32LE encoded bytes. By default, this method produces
120 * UTF-32BE bytes with no BOM. 120 * UTF-32BE bytes with no BOM.
121 */ 121 */
122 List<int> encodeUtf32le(String str, [bool writeBOM = false]) { 122 List<int> encodeUtf32le(String str, [bool writeBOM = false]) {
123 List<int> utf32CodeUnits = stringToCodepoints(str); 123 List<int> utf32CodeUnits = str.charCodes();
124 List<int> encoding = new List<int>(4 * utf32CodeUnits.length + 124 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +
125 (writeBOM ? 4 : 0)); 125 (writeBOM ? 4 : 0));
126 int i = 0; 126 int i = 0;
127 if (writeBOM) { 127 if (writeBOM) {
128 encoding[i++] = UNICODE_UTF_BOM_LO; 128 encoding[i++] = UNICODE_UTF_BOM_LO;
129 encoding[i++] = UNICODE_UTF_BOM_HI; 129 encoding[i++] = UNICODE_UTF_BOM_HI;
130 encoding[i++] = 0; 130 encoding[i++] = 0;
131 encoding[i++] = 0; 131 encoding[i++] = 0;
132 } 132 }
133 for (int unit in utf32CodeUnits) { 133 for (int unit in utf32CodeUnits) {
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
310 value += (utf32EncodedBytesIterator.next() << 24); 310 value += (utf32EncodedBytesIterator.next() << 24);
311 return value; 311 return value;
312 } 312 }
313 } 313 }
314 314
315 bool _validCodepoint(int codepoint) { 315 bool _validCodepoint(int codepoint) {
316 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || 316 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) ||
317 (codepoint > UNICODE_UTF16_RESERVED_HI && 317 (codepoint > UNICODE_UTF16_RESERVED_HI &&
318 codepoint < UNICODE_VALID_RANGE_MAX); 318 codepoint < UNICODE_VALID_RANGE_MAX);
319 } 319 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698