Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(393)

Side by Side Diff: pkg/utf/lib/utf16.dart

Issue 68563004: Move unicode tests to utf package. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Simplify test. Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of utf; 5 part of utf;
6 6
7 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501).
floitsch 2013/11/18 17:08:17 Whole section copied verbatim.
Lasse Reichstein Nielsen 2013/11/19 07:42:38 I don't think I want to add anything new to the ut
floitsch 2013/11/19 10:40:32 I don't agree. The utf-package contains much more
Lasse Reichstein Nielsen 2013/11/19 12:25:43 From the same package - in that case, LGTM.
8 /**
9 * Provide a list of Unicode codepoints for a given string.
10 */
11 List<int> stringToCodepoints(String str) {
12 // Note: str.codeUnits gives us 16-bit code units on all Dart implementations.
13 // So we need to convert.
14 return _utf16CodeUnitsToCodepoints(str.codeUnits);
Lasse Reichstein Nielsen 2013/11/19 07:42:38 If we keep it (and I don't think we should - if it
floitsch 2013/11/19 10:40:32 This was code that already existed in the package.
15 }
16
17 /**
18 * Generate a string from the provided Unicode codepoints.
19 *
20 * *Deprecated* Use [String.fromCharCodes] instead.
Lasse Reichstein Nielsen 2013/11/19 07:42:38 Ditto - remove this. Definitely remove the "Deprec
floitsch 2013/11/19 10:40:32 Not in this CL.
21 */
22 String codepointsToString(List<int> codepoints) {
23 return new String.fromCharCodes(codepoints);
24 }
25
26 /**
27 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units.
28 * The parameters can override the default Unicode replacement character. Set
29 * the replacementCharacter to null to throw an ArgumentError
30 * rather than replace the bad value.
31 */
32 class Utf16CodeUnitDecoder implements Iterator<int> {
Lasse Reichstein Nielsen 2013/11/19 07:42:38 Do we have a way to use a Converter to go from inp
floitsch 2013/11/19 10:40:32 Again. this is code that already existed. Not chan
33 final _ListRangeIterator utf16CodeUnitIterator;
34 final int replacementCodepoint;
35 int _current = null;
36
37 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length,
38 int this.replacementCodepoint =
39 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
40 utf16CodeUnitIterator =
41 (new _ListRange(utf16CodeUnits, offset, length)).iterator;
42
43 Utf16CodeUnitDecoder.fromListRangeIterator(
44 _ListRangeIterator this.utf16CodeUnitIterator,
45 int this.replacementCodepoint);
46
47 Iterator<int> get iterator => this;
48
49 int get current => _current;
50
51 bool moveNext() {
52 _current = null;
53 if (!utf16CodeUnitIterator.moveNext()) return false;
54
55 int value = utf16CodeUnitIterator.current;
56 if (value < 0) {
57 if (replacementCodepoint != null) {
58 _current = replacementCodepoint;
59 } else {
60 throw new ArgumentError(
61 "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
62 }
63 } else if (value < UNICODE_UTF16_RESERVED_LO ||
64 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
65 // transfer directly
66 _current = value;
67 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
68 utf16CodeUnitIterator.moveNext()) {
69 // merge surrogate pair
70 int nextValue = utf16CodeUnitIterator.current;
71 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
72 nextValue <= UNICODE_UTF16_RESERVED_HI) {
73 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10;
74 value += UNICODE_UTF16_OFFSET +
75 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE);
76 _current = value;
77 } else {
78 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE &&
79 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) {
80 utf16CodeUnitIterator.backup();
81 }
82 if (replacementCodepoint != null) {
83 _current = replacementCodepoint;
84 } else {
85 throw new ArgumentError(
86 "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
87 }
88 }
89 } else if (replacementCodepoint != null) {
90 _current = replacementCodepoint;
91 } else {
92 throw new ArgumentError(
93 "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
94 }
95 return true;
96 }
97 }
98
99 /**
100 * Encode code points as UTF16 code units.
101 */
102 List<int> _codepointsToUtf16CodeUnits(
Lasse Reichstein Nielsen 2013/11/19 07:42:38 If this isn't used, remove it. If it is, consider
floitsch 2013/11/19 10:40:32 ditto.
103 List<int> codepoints,
104 [int offset = 0,
105 int length,
106 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
107
108 _ListRange listRange = new _ListRange(codepoints, offset, length);
109 int encodedLength = 0;
110 for (int value in listRange) {
111 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
112 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
113 encodedLength++;
114 } else if (value > UNICODE_PLANE_ONE_MAX &&
115 value <= UNICODE_VALID_RANGE_MAX) {
116 encodedLength += 2;
117 } else {
118 encodedLength++;
119 }
120 }
121
122 List<int> codeUnitsBuffer = new List<int>(encodedLength);
123 int j = 0;
124 for (int value in listRange) {
125 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
126 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
127 codeUnitsBuffer[j++] = value;
128 } else if (value > UNICODE_PLANE_ONE_MAX &&
129 value <= UNICODE_VALID_RANGE_MAX) {
130 int base = value - UNICODE_UTF16_OFFSET;
131 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE +
132 ((base & UNICODE_UTF16_HI_MASK) >> 10);
133 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE +
134 (base & UNICODE_UTF16_LO_MASK);
135 } else if (replacementCodepoint != null) {
136 codeUnitsBuffer[j++] = replacementCodepoint;
137 } else {
138 throw new ArgumentError("Invalid encoding");
139 }
140 }
141 return codeUnitsBuffer;
142 }
143
144 /**
145 * Decodes the utf16 codeunits to codepoints.
146 */
147 List<int> _utf16CodeUnitsToCodepoints(
Lasse Reichstein Nielsen 2013/11/19 07:42:38 If not used, remove. If used, consider rewriting a
floitsch 2013/11/19 10:40:32 ditto.
148 List<int> utf16CodeUnits, [int offset = 0, int length,
149 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
150 _ListRangeIterator source =
151 (new _ListRange(utf16CodeUnits, offset, length)).iterator;
152 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder
153 .fromListRangeIterator(source, replacementCodepoint);
154 List<int> codepoints = new List<int>(source.remaining);
155 int i = 0;
156 while (decoder.moveNext()) {
157 codepoints[i++] = decoder.current;
158 }
159 if (i == codepoints.length) {
160 return codepoints;
161 } else {
162 List<int> codepointTrunc = new List<int>(i);
163 codepointTrunc.setRange(0, i, codepoints);
164 return codepointTrunc;
165 }
166 }
167
7 /** 168 /**
8 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert 169 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert
9 * as much of the input as needed. Determines the byte order from the BOM, 170 * as much of the input as needed. Determines the byte order from the BOM,
10 * or uses big-endian as a default. This method always strips a leading BOM. 171 * or uses big-endian as a default. This method always strips a leading BOM.
11 * Set the [replacementCodepoint] to null to throw an ArgumentError 172 * Set the [replacementCodepoint] to null to throw an ArgumentError
12 * rather than replace the bad value. The default value for 173 * rather than replace the bad value. The default value for
13 * [replacementCodepoint] is U+FFFD. 174 * [replacementCodepoint] is U+FFFD.
14 */ 175 */
15 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0, 176 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0,
16 int length, int replacementCodepoint = 177 int length, int replacementCodepoint =
(...skipping 232 matching lines...) Expand 10 before | Expand all | Expand 10 after
249 List<int> truncCodeunits = new List<int>(i); 410 List<int> truncCodeunits = new List<int>(i);
250 truncCodeunits.setRange(0, i, codeunits); 411 truncCodeunits.setRange(0, i, codeunits);
251 return truncCodeunits; 412 return truncCodeunits;
252 } 413 }
253 } 414 }
254 415
255 int get current => _current; 416 int get current => _current;
256 417
257 bool moveNext() { 418 bool moveNext() {
258 _current = null; 419 _current = null;
259 if (utf16EncodedBytesIterator.remaining < 2) { 420 int remaining = utf16EncodedBytesIterator.remaining;
421 if (remaining == 0) {
422 _current = null;
423 return false;
424 }
425 if (remaining == 1) {
260 utf16EncodedBytesIterator.moveNext(); 426 utf16EncodedBytesIterator.moveNext();
261 if (replacementCodepoint != null) { 427 if (replacementCodepoint != null) {
262 _current = replacementCodepoint; 428 _current = replacementCodepoint;
263 return true; 429 return true;
264 } else { 430 } else {
265 throw new ArgumentError( 431 throw new ArgumentError(
266 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); 432 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}");
267 } 433 }
268 } else {
269 _current = decode();
270 return true;
271 } 434 }
435 _current = decode();
436 return true;
272 } 437 }
273 438
274 int get position => utf16EncodedBytesIterator.position ~/ 2; 439 int get position => utf16EncodedBytesIterator.position ~/ 2;
275 440
276 void backup([int by = 1]) { 441 void backup([int by = 1]) {
277 utf16EncodedBytesIterator.backup(2 * by); 442 utf16EncodedBytesIterator.backup(2 * by);
278 } 443 }
279 444
280 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2; 445 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2;
281 446
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
328 } 493 }
329 494
330 int decode() { 495 int decode() {
331 utf16EncodedBytesIterator.moveNext(); 496 utf16EncodedBytesIterator.moveNext();
332 int lo = utf16EncodedBytesIterator.current; 497 int lo = utf16EncodedBytesIterator.current;
333 utf16EncodedBytesIterator.moveNext(); 498 utf16EncodedBytesIterator.moveNext();
334 int hi = utf16EncodedBytesIterator.current; 499 int hi = utf16EncodedBytesIterator.current;
335 return (hi << 8) + lo; 500 return (hi << 8) + lo;
336 } 501 }
337 } 502 }
OLDNEW
« no previous file with comments | « pkg/utf/lib/utf.dart ('k') | pkg/utf/lib/utf32.dart » ('j') | pkg/utf/test/utf16_test.dart » ('J')

Powered by Google App Engine
This is Rietveld 408576698