Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(231)

Side by Side Diff: sdk/lib/utf/utf_core.dart

Issue 11783009: Big merge from experimental to bleeding edge. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « sdk/lib/utf/utf8.dart ('k') | tests/benchmark_smoke/benchmark_lib.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.utf; 5 part of dart.utf;
6 6
7 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501). 7 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501).
8 /** 8 /**
9 * Provide a list of Unicode codepoints for a given string. 9 * Provide a list of Unicode codepoints for a given string.
10 */ 10 */
(...skipping 27 matching lines...) Expand all
38 const int UNICODE_UTF16_OFFSET = 0x10000; 38 const int UNICODE_UTF16_OFFSET = 0x10000;
39 const int UNICODE_UTF16_SURROGATE_UNIT_0_BASE = 0xd800; 39 const int UNICODE_UTF16_SURROGATE_UNIT_0_BASE = 0xd800;
40 const int UNICODE_UTF16_SURROGATE_UNIT_1_BASE = 0xdc00; 40 const int UNICODE_UTF16_SURROGATE_UNIT_1_BASE = 0xdc00;
41 const int UNICODE_UTF16_HI_MASK = 0xffc00; 41 const int UNICODE_UTF16_HI_MASK = 0xffc00;
42 const int UNICODE_UTF16_LO_MASK = 0x3ff; 42 const int UNICODE_UTF16_LO_MASK = 0x3ff;
43 43
44 /** 44 /**
45 * Encode code points as UTF16 code units. 45 * Encode code points as UTF16 code units.
46 */ 46 */
47 List<int> _codepointsToUtf16CodeUnits( 47 List<int> _codepointsToUtf16CodeUnits(
48 List<int> codepoints, [int offset = 0, int length, 48 List<int> codepoints,
49 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 49 [int offset = 0,
50 int length,
51 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
50 52
51 _ListRange listRange = new _ListRange(codepoints, offset, length); 53 _ListRange listRange = new _ListRange(codepoints, offset, length);
52 int encodedLength = 0; 54 int encodedLength = 0;
53 for (int value in listRange) { 55 for (int value in listRange) {
54 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || 56 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
55 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { 57 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
56 encodedLength++; 58 encodedLength++;
57 } else if (value > UNICODE_PLANE_ONE_MAX && 59 } else if (value > UNICODE_PLANE_ONE_MAX &&
58 value <= UNICODE_VALID_RANGE_MAX) { 60 value <= UNICODE_VALID_RANGE_MAX) {
59 encodedLength += 2; 61 encodedLength += 2;
60 } else { 62 } else {
61 encodedLength++; 63 encodedLength++;
62 } 64 }
63 } 65 }
64 66
65 List<int> codeUnitsBuffer = new List<int>(encodedLength); 67 List<int> codeUnitsBuffer = new List<int>.fixedLength(encodedLength);
66 int j = 0; 68 int j = 0;
67 for (int value in listRange) { 69 for (int value in listRange) {
68 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || 70 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
69 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { 71 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
70 codeUnitsBuffer[j++] = value; 72 codeUnitsBuffer[j++] = value;
71 } else if (value > UNICODE_PLANE_ONE_MAX && 73 } else if (value > UNICODE_PLANE_ONE_MAX &&
72 value <= UNICODE_VALID_RANGE_MAX) { 74 value <= UNICODE_VALID_RANGE_MAX) {
73 int base = value - UNICODE_UTF16_OFFSET; 75 int base = value - UNICODE_UTF16_OFFSET;
74 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + 76 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE +
75 ((base & UNICODE_UTF16_HI_MASK) >> 10); 77 ((base & UNICODE_UTF16_HI_MASK) >> 10);
76 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + 78 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE +
77 (base & UNICODE_UTF16_LO_MASK); 79 (base & UNICODE_UTF16_LO_MASK);
78 } else if (replacementCodepoint != null) { 80 } else if (replacementCodepoint != null) {
79 codeUnitsBuffer[j++] = replacementCodepoint; 81 codeUnitsBuffer[j++] = replacementCodepoint;
80 } else { 82 } else {
81 throw new ArgumentError("Invalid encoding"); 83 throw new ArgumentError("Invalid encoding");
82 } 84 }
83 } 85 }
84 return codeUnitsBuffer; 86 return codeUnitsBuffer;
85 } 87 }
86 88
87 /** 89 /**
88 * Decodes the utf16 codeunits to codepoints. 90 * Decodes the utf16 codeunits to codepoints.
89 */ 91 */
90 List<int> _utf16CodeUnitsToCodepoints( 92 List<int> _utf16CodeUnitsToCodepoints(
91 List<int> utf16CodeUnits, [int offset = 0, int length, 93 List<int> utf16CodeUnits, [int offset = 0, int length,
92 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 94 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
93 _ListRangeIterator source = 95 _ListRangeIterator source =
94 (new _ListRange(utf16CodeUnits, offset, length)).iterator(); 96 (new _ListRange(utf16CodeUnits, offset, length)).iterator;
95 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder 97 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder
96 .fromListRangeIterator(source, replacementCodepoint); 98 .fromListRangeIterator(source, replacementCodepoint);
97 List<int> codepoints = new List<int>(source.remaining); 99 List<int> codepoints = new List<int>.fixedLength(source.remaining);
98 int i = 0; 100 int i = 0;
99 while (decoder.hasNext) { 101 while (decoder.moveNext()) {
100 codepoints[i++] = decoder.next(); 102 codepoints[i++] = decoder.current;
101 } 103 }
102 if (i == codepoints.length) { 104 if (i == codepoints.length) {
103 return codepoints; 105 return codepoints;
104 } else { 106 } else {
105 List<int> codepointTrunc = new List<int>(i); 107 List<int> codepointTrunc = new List<int>.fixedLength(i);
106 codepointTrunc.setRange(0, i, codepoints); 108 codepointTrunc.setRange(0, i, codepoints);
107 return codepointTrunc; 109 return codepointTrunc;
108 } 110 }
109 } 111 }
110 112
111 /** 113 /**
112 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. 114 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units.
113 * The parameters can override the default Unicode replacement character. Set 115 * The parameters can override the default Unicode replacement character. Set
114 * the replacementCharacter to null to throw an ArgumentError 116 * the replacementCharacter to null to throw an ArgumentError
115 * rather than replace the bad value. 117 * rather than replace the bad value.
116 */ 118 */
117 class Utf16CodeUnitDecoder implements Iterator<int> { 119 class Utf16CodeUnitDecoder implements Iterator<int> {
118 final _ListRangeIterator utf16CodeUnitIterator; 120 final _ListRangeIterator utf16CodeUnitIterator;
119 final int replacementCodepoint; 121 final int replacementCodepoint;
122 int _current = null;
120 123
121 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, 124 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length,
122 int this.replacementCodepoint = 125 int this.replacementCodepoint =
123 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 126 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
124 utf16CodeUnitIterator = (new _ListRange(utf16CodeUnits, offset, length)) 127 utf16CodeUnitIterator =
125 .iterator(); 128 (new _ListRange(utf16CodeUnits, offset, length)).iterator;
126 129
127 Utf16CodeUnitDecoder.fromListRangeIterator( 130 Utf16CodeUnitDecoder.fromListRangeIterator(
128 _ListRangeIterator this.utf16CodeUnitIterator, 131 _ListRangeIterator this.utf16CodeUnitIterator,
129 int this.replacementCodepoint); 132 int this.replacementCodepoint);
130 133
131 Iterator<int> iterator() => this; 134 Iterator<int> get iterator => this;
132 135
133 bool get hasNext => utf16CodeUnitIterator.hasNext; 136 int get current => _current;
134 137
135 int next() { 138 bool moveNext() {
136 int value = utf16CodeUnitIterator.next(); 139 _current = null;
140 if (!utf16CodeUnitIterator.moveNext()) return false;
141
142 int value = utf16CodeUnitIterator.current;
137 if (value < 0) { 143 if (value < 0) {
138 if (replacementCodepoint != null) { 144 if (replacementCodepoint != null) {
139 return replacementCodepoint; 145 _current = replacementCodepoint;
140 } else { 146 } else {
141 throw new ArgumentError( 147 throw new ArgumentError(
142 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); 148 "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
143 } 149 }
144 } else if (value < UNICODE_UTF16_RESERVED_LO || 150 } else if (value < UNICODE_UTF16_RESERVED_LO ||
145 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { 151 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
146 // transfer directly 152 // transfer directly
147 return value; 153 _current = value;
148 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && 154 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
149 utf16CodeUnitIterator.hasNext) { 155 utf16CodeUnitIterator.moveNext()) {
150 // merge surrogate pair 156 // merge surrogate pair
151 int nextValue = utf16CodeUnitIterator.next(); 157 int nextValue = utf16CodeUnitIterator.current;
152 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && 158 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
153 nextValue <= UNICODE_UTF16_RESERVED_HI) { 159 nextValue <= UNICODE_UTF16_RESERVED_HI) {
154 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; 160 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10;
155 value += UNICODE_UTF16_OFFSET + 161 value += UNICODE_UTF16_OFFSET +
156 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); 162 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE);
157 return value; 163 _current = value;
158 } else { 164 } else {
159 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && 165 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE &&
160 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { 166 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) {
161 utf16CodeUnitIterator.backup(); 167 utf16CodeUnitIterator.backup();
162 } 168 }
163 if (replacementCodepoint != null) { 169 if (replacementCodepoint != null) {
164 return replacementCodepoint; 170 _current = replacementCodepoint;
165 } else { 171 } else {
166 throw new ArgumentError( 172 throw new ArgumentError(
167 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); 173 "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
168 } 174 }
169 } 175 }
170 } else if (replacementCodepoint != null) { 176 } else if (replacementCodepoint != null) {
171 return replacementCodepoint; 177 _current = replacementCodepoint;
172 } else { 178 } else {
173 throw new ArgumentError( 179 throw new ArgumentError(
174 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); 180 "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
175 } 181 }
182 return true;
176 } 183 }
177 } 184 }
178 185
179 /** 186 /**
180 * _ListRange in an internal type used to create a lightweight Interable on a 187 * _ListRange in an internal type used to create a lightweight Interable on a
181 * range within a source list. DO NOT MODIFY the underlying list while 188 * range within a source list. DO NOT MODIFY the underlying list while
182 * iterating over it. The results of doing so are undefined. 189 * iterating over it. The results of doing so are undefined.
183 */ 190 */
184 class _ListRange implements Iterable { 191 // TODO(floitsch): Consider removing the extend and switch to implements since
192 // that's cheaper to allocate.
193 class _ListRange extends Iterable {
185 final List _source; 194 final List _source;
186 final int _offset; 195 final int _offset;
187 final int _length; 196 final int _length;
188 197
189 _ListRange(source, [offset = 0, length]) : 198 _ListRange(source, [offset = 0, length]) :
190 this._source = source, 199 this._source = source,
191 this._offset = offset, 200 this._offset = offset,
192 this._length = (length == null ? source.length - offset : length) { 201 this._length = (length == null ? source.length - offset : length) {
193 if (_offset < 0 || _offset > _source.length) { 202 if (_offset < 0 || _offset > _source.length) {
194 throw new RangeError.value(_offset); 203 throw new RangeError.value(_offset);
195 } 204 }
196 if (_length != null && (_length < 0)) { 205 if (_length != null && (_length < 0)) {
197 throw new RangeError.value(_length); 206 throw new RangeError.value(_length);
198 } 207 }
199 if (_length + _offset > _source.length) { 208 if (_length + _offset > _source.length) {
200 throw new RangeError.value(_length + _offset); 209 throw new RangeError.value(_length + _offset);
201 } 210 }
202 } 211 }
203 212
204 _ListRangeIterator iterator() => 213 _ListRangeIterator get iterator =>
205 new _ListRangeIteratorImpl(_source, _offset, _offset + _length); 214 new _ListRangeIteratorImpl(_source, _offset, _offset + _length);
206 215
207 int get length => _length; 216 int get length => _length;
208 } 217 }
209 218
210 /** 219 /**
211 * The _ListRangeIterator provides more capabilities than a standard iterator, 220 * The _ListRangeIterator provides more capabilities than a standard iterator,
212 * including the ability to get the current position, count remaining items, 221 * including the ability to get the current position, count remaining items,
213 * and move forward/backward within the iterator. 222 * and move forward/backward within the iterator.
214 */ 223 */
215 abstract class _ListRangeIterator implements Iterator<int> { 224 abstract class _ListRangeIterator implements Iterator<int> {
216 bool hasNext; 225 bool moveNext();
217 int next(); 226 int get current;
218 int get position; 227 int get position;
219 void backup([by]); 228 void backup([by]);
220 int get remaining; 229 int get remaining;
221 void skip([count]); 230 void skip([count]);
222 } 231 }
223 232
224 class _ListRangeIteratorImpl implements _ListRangeIterator { 233 class _ListRangeIteratorImpl implements _ListRangeIterator {
225 final List<int> _source; 234 final List<int> _source;
226 int _offset; 235 int _offset;
227 final int _end; 236 final int _end;
228 237
229 _ListRangeIteratorImpl(this._source, this._offset, this._end); 238 _ListRangeIteratorImpl(this._source, int offset, this._end)
239 : _offset = offset - 1;
230 240
231 bool get hasNext => _offset < _end; 241 int get current => _source[_offset];
232 242
233 int next() => _source[_offset++]; 243 bool moveNext() => ++_offset < _end;
234 244
235 int get position => _offset; 245 int get position => _offset;
236 246
237 void backup([int by = 1]) { 247 void backup([int by = 1]) {
238 _offset -= by; 248 _offset -= by;
239 } 249 }
240 250
241 int get remaining => _end - _offset; 251 int get remaining => _end - _offset - 1;
242 252
243 void skip([int count = 1]) { 253 void skip([int count = 1]) {
244 _offset += count; 254 _offset += count;
245 } 255 }
246 } 256 }
247 257
OLDNEW
« no previous file with comments | « sdk/lib/utf/utf8.dart ('k') | tests/benchmark_smoke/benchmark_lib.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698