Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1377)

Side by Side Diff: sdk/lib/utf/utf_core.dart

Issue 11410086: Use iterator, moveNext(), current. (Closed) Base URL: https://dart.googlecode.com/svn/experimental/lib_v2/dart
Patch Set: Address comments. Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501). 5 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501).
6 /** 6 /**
7 * Provide a list of Unicode codepoints for a given string. 7 * Provide a list of Unicode codepoints for a given string.
8 */ 8 */
9 List<int> stringToCodepoints(String str) { 9 List<int> stringToCodepoints(String str) {
10 // Note: str.charCodes gives us 16-bit code units on all Dart implementations. 10 // Note: str.charCodes gives us 16-bit code units on all Dart implementations.
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
61 const int UNICODE_UTF16_OFFSET = 0x10000; 61 const int UNICODE_UTF16_OFFSET = 0x10000;
62 const int UNICODE_UTF16_SURROGATE_UNIT_0_BASE = 0xd800; 62 const int UNICODE_UTF16_SURROGATE_UNIT_0_BASE = 0xd800;
63 const int UNICODE_UTF16_SURROGATE_UNIT_1_BASE = 0xdc00; 63 const int UNICODE_UTF16_SURROGATE_UNIT_1_BASE = 0xdc00;
64 const int UNICODE_UTF16_HI_MASK = 0xffc00; 64 const int UNICODE_UTF16_HI_MASK = 0xffc00;
65 const int UNICODE_UTF16_LO_MASK = 0x3ff; 65 const int UNICODE_UTF16_LO_MASK = 0x3ff;
66 66
67 /** 67 /**
68 * Encode code points as UTF16 code units. 68 * Encode code points as UTF16 code units.
69 */ 69 */
70 List<int> _codepointsToUtf16CodeUnits( 70 List<int> _codepointsToUtf16CodeUnits(
71 List<int> codepoints, [int offset = 0, int length, 71 List<int> codepoints,
72 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 72 [int offset = 0,
73 int length,
74 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
73 75
74 _ListRange listRange = new _ListRange(codepoints, offset, length); 76 _ListRange listRange = new _ListRange(codepoints, offset, length);
75 int encodedLength = 0; 77 int encodedLength = 0;
76 for (int value in listRange) { 78 for (int value in listRange) {
77 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || 79 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
78 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { 80 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
79 encodedLength++; 81 encodedLength++;
80 } else if (value > UNICODE_PLANE_ONE_MAX && 82 } else if (value > UNICODE_PLANE_ONE_MAX &&
81 value <= UNICODE_VALID_RANGE_MAX) { 83 value <= UNICODE_VALID_RANGE_MAX) {
82 encodedLength += 2; 84 encodedLength += 2;
(...skipping 24 matching lines...) Expand all
107 return codeUnitsBuffer; 109 return codeUnitsBuffer;
108 } 110 }
109 111
110 /** 112 /**
111 * Decodes the utf16 codeunits to codepoints. 113 * Decodes the utf16 codeunits to codepoints.
112 */ 114 */
113 List<int> _utf16CodeUnitsToCodepoints( 115 List<int> _utf16CodeUnitsToCodepoints(
114 List<int> utf16CodeUnits, [int offset = 0, int length, 116 List<int> utf16CodeUnits, [int offset = 0, int length,
115 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 117 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
116 _ListRangeIterator source = 118 _ListRangeIterator source =
117 (new _ListRange(utf16CodeUnits, offset, length)).iterator(); 119 (new _ListRange(utf16CodeUnits, offset, length)).iterator;
118 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder 120 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder
119 .fromListRangeIterator(source, replacementCodepoint); 121 .fromListRangeIterator(source, replacementCodepoint);
120 List<int> codepoints = new List<int>(source.remaining); 122 List<int> codepoints = new List<int>(source.remaining);
121 int i = 0; 123 int i = 0;
122 while (decoder.hasNext) { 124 while (decoder.moveNext()) {
123 codepoints[i++] = decoder.next(); 125 codepoints[i++] = decoder.current;
124 } 126 }
125 if (i == codepoints.length) { 127 if (i == codepoints.length) {
126 return codepoints; 128 return codepoints;
127 } else { 129 } else {
128 List<int> codepointTrunc = new List<int>(i); 130 List<int> codepointTrunc = new List<int>(i);
129 codepointTrunc.setRange(0, i, codepoints); 131 codepointTrunc.setRange(0, i, codepoints);
130 return codepointTrunc; 132 return codepointTrunc;
131 } 133 }
132 } 134 }
133 135
134 /** 136 /**
135 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. 137 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units.
136 * The parameters can override the default Unicode replacement character. Set 138 * The parameters can override the default Unicode replacement character. Set
137 * the replacementCharacter to null to throw an ArgumentError 139 * the replacementCharacter to null to throw an ArgumentError
138 * rather than replace the bad value. 140 * rather than replace the bad value.
139 */ 141 */
140 class Utf16CodeUnitDecoder implements Iterator<int> { 142 class Utf16CodeUnitDecoder implements Iterator<int> {
141 final _ListRangeIterator utf16CodeUnitIterator; 143 final _ListRangeIterator utf16CodeUnitIterator;
142 final int replacementCodepoint; 144 final int replacementCodepoint;
145 int _current = -1;
143 146
144 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, 147 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length,
145 int this.replacementCodepoint = 148 int this.replacementCodepoint =
146 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 149 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
147 utf16CodeUnitIterator = (new _ListRange(utf16CodeUnits, offset, length)) 150 utf16CodeUnitIterator =
148 .iterator(); 151 (new _ListRange(utf16CodeUnits, offset, length)).iterator;
149 152
150 Utf16CodeUnitDecoder.fromListRangeIterator( 153 Utf16CodeUnitDecoder.fromListRangeIterator(
151 _ListRangeIterator this.utf16CodeUnitIterator, 154 _ListRangeIterator this.utf16CodeUnitIterator,
152 int this.replacementCodepoint); 155 int this.replacementCodepoint);
153 156
154 Iterator<int> iterator() => this; 157 Iterator<int> get iterator => this;
155 158
156 bool get hasNext => utf16CodeUnitIterator.hasNext; 159 int get current {
160 if (_current == -1) {
161 // TODO(floitsch): bad error message.
162 throw new StateError("No more elements");
163 }
164 return _current;
165 }
157 166
158 int next() { 167 bool moveNext() {
159 int value = utf16CodeUnitIterator.next(); 168 _current = -1;
169 if (!utf16CodeUnitIterator.moveNext()) return false;
170
171 int value = utf16CodeUnitIterator.current;
160 if (value < 0) { 172 if (value < 0) {
161 if (replacementCodepoint != null) { 173 if (replacementCodepoint != null) {
162 return replacementCodepoint; 174 _current = replacementCodepoint;
163 } else { 175 } else {
164 throw new ArgumentError( 176 throw new ArgumentError(
165 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); 177 "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
166 } 178 }
167 } else if (value < UNICODE_UTF16_RESERVED_LO || 179 } else if (value < UNICODE_UTF16_RESERVED_LO ||
168 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { 180 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
169 // transfer directly 181 // transfer directly
170 return value; 182 _current = value;
171 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && 183 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
172 utf16CodeUnitIterator.hasNext) { 184 utf16CodeUnitIterator.moveNext()) {
173 // merge surrogate pair 185 // merge surrogate pair
174 int nextValue = utf16CodeUnitIterator.next(); 186 int nextValue = utf16CodeUnitIterator.current;
175 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && 187 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
176 nextValue <= UNICODE_UTF16_RESERVED_HI) { 188 nextValue <= UNICODE_UTF16_RESERVED_HI) {
177 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; 189 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10;
178 value += UNICODE_UTF16_OFFSET + 190 value += UNICODE_UTF16_OFFSET +
179 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); 191 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE);
180 return value; 192 _current = value;
181 } else { 193 } else {
182 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && 194 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE &&
183 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { 195 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) {
184 utf16CodeUnitIterator.backup(); 196 utf16CodeUnitIterator.backup();
185 } 197 }
186 if (replacementCodepoint != null) { 198 if (replacementCodepoint != null) {
187 return replacementCodepoint; 199 _current = replacementCodepoint;
188 } else { 200 } else {
189 throw new ArgumentError( 201 throw new ArgumentError(
190 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); 202 "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
191 } 203 }
192 } 204 }
193 } else if (replacementCodepoint != null) { 205 } else if (replacementCodepoint != null) {
194 return replacementCodepoint; 206 _current = replacementCodepoint;
195 } else { 207 } else {
196 throw new ArgumentError( 208 throw new ArgumentError(
197 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); 209 "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
198 } 210 }
211 return true;
199 } 212 }
200 } 213 }
201 214
202 /** 215 /**
203 * _ListRange in an internal type used to create a lightweight Interable on a 216 * _ListRange in an internal type used to create a lightweight Interable on a
204 * range within a source list. DO NOT MODIFY the underlying list while 217 * range within a source list. DO NOT MODIFY the underlying list while
205 * iterating over it. The results of doing so are undefined. 218 * iterating over it. The results of doing so are undefined.
206 */ 219 */
207 class _ListRange extends Iterable { 220 class _ListRange extends Iterable {
208 final List _source; 221 final List _source;
209 final int _offset; 222 final int _offset;
210 final int _length; 223 final int _length;
211 224
212 _ListRange(source, [offset = 0, length]) : 225 _ListRange(source, [offset = 0, length]) :
213 this._source = source, 226 this._source = source,
214 this._offset = offset, 227 this._offset = offset,
215 this._length = (length == null ? source.length - offset : length) { 228 this._length = (length == null ? source.length - offset : length) {
216 if (_offset < 0 || _offset > _source.length) { 229 if (_offset < 0 || _offset > _source.length) {
217 throw new RangeError.value(_offset); 230 throw new RangeError.value(_offset);
218 } 231 }
219 if (_length != null && (_length < 0)) { 232 if (_length != null && (_length < 0)) {
220 throw new RangeError.value(_length); 233 throw new RangeError.value(_length);
221 } 234 }
222 if (_length + _offset > _source.length) { 235 if (_length + _offset > _source.length) {
223 throw new RangeError.value(_length + _offset); 236 throw new RangeError.value(_length + _offset);
224 } 237 }
225 } 238 }
226 239
227 _ListRangeIterator iterator() => 240 _ListRangeIterator get iterator =>
228 new _ListRangeIteratorImpl(_source, _offset, _offset + _length); 241 new _ListRangeIteratorImpl(_source, _offset, _offset + _length);
229 242
230 int get length => _length; 243 int get length => _length;
231 } 244 }
232 245
233 /** 246 /**
234 * The _ListRangeIterator provides more capabilities than a standard iterator, 247 * The _ListRangeIterator provides more capabilities than a standard iterator,
235 * including the ability to get the current position, count remaining items, 248 * including the ability to get the current position, count remaining items,
236 * and move forward/backward within the iterator. 249 * and move forward/backward within the iterator.
237 */ 250 */
238 abstract class _ListRangeIterator implements Iterator<int> { 251 abstract class _ListRangeIterator implements Iterator<int> {
239 bool hasNext; 252 bool moveNext();
240 int next(); 253 int get current;
241 int get position; 254 int get position;
242 void backup([by]); 255 void backup([by]);
243 int get remaining; 256 int get remaining;
244 void skip([count]); 257 void skip([count]);
245 } 258 }
246 259
247 class _ListRangeIteratorImpl implements _ListRangeIterator { 260 class _ListRangeIteratorImpl implements _ListRangeIterator {
248 final List<int> _source; 261 final List<int> _source;
249 int _offset; 262 int _offset;
250 final int _end; 263 final int _end;
251 264
252 _ListRangeIteratorImpl(this._source, this._offset, this._end); 265 _ListRangeIteratorImpl(this._source, int offset, this._end)
266 : _offset = offset - 1;
253 267
254 bool get hasNext => _offset < _end; 268 int get current => _source[_offset];
255 269
256 int next() => _source[_offset++]; 270 bool moveNext() => ++_offset < _end;
257 271
258 int get position => _offset; 272 int get position => _offset;
259 273
260 void backup([int by = 1]) { 274 void backup([int by = 1]) {
261 _offset -= by; 275 _offset -= by;
262 } 276 }
263 277
264 int get remaining => _end - _offset; 278 int get remaining => _end - _offset - 1;
265 279
266 void skip([int count = 1]) { 280 void skip([int count = 1]) {
267 _offset += count; 281 _offset += count;
268 } 282 }
269 } 283 }
270 284
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698