sdk/lib/utf/utf16.dart - Issue 11783009: Big merge from experimental to bleeding edge.

Side by Side Diff: sdk/lib/utf/utf16.dart

Issue 11783009: Big merge from experimental to bleeding edge. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 part of dart.utf;	5 part of dart.utf;

6	6

7 /**	7 /**

8 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert	8 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert

9 * as much of the input as needed. Determines the byte order from the BOM,	9 * as much of the input as needed. Determines the byte order from the BOM,

10 * or uses big-endian as a default. This method always strips a leading BOM.	10 * or uses big-endian as a default. This method always strips a leading BOM.

(...skipping 95 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
106 List<int> encodeUtf16(String str) =>	106 List<int> encodeUtf16(String str) =>

107 encodeUtf16be(str, true);	107 encodeUtf16be(str, true);

108	108

109 /**	109 /**

110 * Produce a list of UTF-16BE encoded bytes. By default, this method produces	110 * Produce a list of UTF-16BE encoded bytes. By default, this method produces

111 * UTF-16BE bytes with no BOM.	111 * UTF-16BE bytes with no BOM.

112 */	112 */

113 List<int> encodeUtf16be(String str, [bool writeBOM = false]) {	113 List<int> encodeUtf16be(String str, [bool writeBOM = false]) {

114 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str);	114 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str);

115 List<int> encoding =	115 List<int> encoding =

116 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0));	116 new List<int>.fixedLength(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0));

117 int i = 0;	117 int i = 0;

118 if (writeBOM) {	118 if (writeBOM) {

119 encoding[i++] = UNICODE_UTF_BOM_HI;	119 encoding[i++] = UNICODE_UTF_BOM_HI;

120 encoding[i++] = UNICODE_UTF_BOM_LO;	120 encoding[i++] = UNICODE_UTF_BOM_LO;

121 }	121 }

122 for (int unit in utf16CodeUnits) {	122 for (int unit in utf16CodeUnits) {

123 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8;	123 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8;

124 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;	124 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;

125 }	125 }

126 return encoding;	126 return encoding;

127 }	127 }

128	128

129 /**	129 /**

130 * Produce a list of UTF-16LE encoded bytes. By default, this method produces	130 * Produce a list of UTF-16LE encoded bytes. By default, this method produces

131 * UTF-16LE bytes with no BOM.	131 * UTF-16LE bytes with no BOM.

132 */	132 */

133 List<int> encodeUtf16le(String str, [bool writeBOM = false]) {	133 List<int> encodeUtf16le(String str, [bool writeBOM = false]) {

134 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str);	134 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str);

135 List<int> encoding =	135 List<int> encoding =

136 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0));	136 new List<int>.fixedLength(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0));

137 int i = 0;	137 int i = 0;

138 if (writeBOM) {	138 if (writeBOM) {

139 encoding[i++] = UNICODE_UTF_BOM_LO;	139 encoding[i++] = UNICODE_UTF_BOM_LO;

140 encoding[i++] = UNICODE_UTF_BOM_HI;	140 encoding[i++] = UNICODE_UTF_BOM_HI;

141 }	141 }

142 for (int unit in utf16CodeUnits) {	142 for (int unit in utf16CodeUnits) {

143 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;	143 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;

144 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8;	144 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8;

145 }	145 }

146 return encoding;	146 return encoding;

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
181 return _codepointsToUtf16CodeUnits(str.charCodes);	181 return _codepointsToUtf16CodeUnits(str.charCodes);

182 }	182 }

183	183

184 typedef _ListRangeIterator _CodeUnitsProvider();	184 typedef _ListRangeIterator _CodeUnitsProvider();

185	185

186 /**	186 /**

187 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type	187 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type

188 * provides an iterator on demand and the iterator will only translate bytes	188 * provides an iterator on demand and the iterator will only translate bytes

189 * as requested by the user of the iterator. (Note: results are not cached.)	189 * as requested by the user of the iterator. (Note: results are not cached.)

190 */	190 */

191 class IterableUtf16Decoder implements Iterable<int> {	191 // TODO(floitsch): Consider removing the extend and switch to implements since

	192 // that's cheaper to allocate.

	193 class IterableUtf16Decoder extends Iterable<int> {

192 final _CodeUnitsProvider codeunitsProvider;	194 final _CodeUnitsProvider codeunitsProvider;

193 final int replacementCodepoint;	195 final int replacementCodepoint;

194	196

195 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint);	197 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint);

196	198

197 Utf16CodeUnitDecoder iterator() =>	199 Utf16CodeUnitDecoder get iterator =>

198 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(),	200 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(),

199 replacementCodepoint);	201 replacementCodepoint);

200 }	202 }

201	203

202 /**	204 /**

203 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes	205 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes

204 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine	206 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine

205 * endian-ness, and defaults to BE.	207 * endian-ness, and defaults to BE.

206 */	208 */

207 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator {	209 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator {

208 final _ListRangeIterator utf16EncodedBytesIterator;	210 final _ListRangeIterator utf16EncodedBytesIterator;

209 final int replacementCodepoint;	211 final int replacementCodepoint;

	212 int _current = null;

210	213

211 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator(	214 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator(

212 this.utf16EncodedBytesIterator, this.replacementCodepoint);	215 this.utf16EncodedBytesIterator, this.replacementCodepoint);

213	216

214 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [	217 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [

215 int offset = 0, int length,	218 int offset = 0, int length,

216 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	219 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

217 if (length == null) {	220 if (length == null) {

218 length = utf16EncodedBytes.length - offset;	221 length = utf16EncodedBytes.length - offset;

219 }	222 }

220 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) {	223 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) {

221 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,	224 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,

222 length - 2, false, replacementCodepoint);	225 length - 2, false, replacementCodepoint);

223 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) {	226 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) {

224 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,	227 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,

225 length - 2, false, replacementCodepoint);	228 length - 2, false, replacementCodepoint);

226 } else {	229 } else {

227 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset,	230 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset,

228 length, false, replacementCodepoint);	231 length, false, replacementCodepoint);

229 }	232 }

230 }	233 }

231	234

232 /**	235 /**

233 * Provides a fast way to decode the rest of the source bytes in a single	236 * Provides a fast way to decode the rest of the source bytes in a single

234 * call. This method trades memory for improved speed in that it potentially	237 * call. This method trades memory for improved speed in that it potentially

235 * over-allocates the List containing results.	238 * over-allocates the List containing results.

236 */	239 */

237 List<int> decodeRest() {	240 List<int> decodeRest() {

238 List<int> codeunits = new List<int>(remaining);	241 List<int> codeunits = new List<int>.fixedLength(remaining);

239 int i = 0;	242 int i = 0;

240 while (hasNext) {	243 while (moveNext()) {

241 codeunits[i++] = next();	244 codeunits[i++] = current;

242 }	245 }

243 if (i == codeunits.length) {	246 if (i == codeunits.length) {

244 return codeunits;	247 return codeunits;

245 } else {	248 } else {

246 List<int> truncCodeunits = new List<int>(i);	249 List<int> truncCodeunits = new List<int>.fixedLength(i);

247 truncCodeunits.setRange(0, i, codeunits);	250 truncCodeunits.setRange(0, i, codeunits);

248 return truncCodeunits;	251 return truncCodeunits;

249 }	252 }

250 }	253 }

251	254

252 bool get hasNext => utf16EncodedBytesIterator.hasNext;	255 int get current => _current;

253	256

254 int next() {	257 bool moveNext() {

	258 _current = null;

255 if (utf16EncodedBytesIterator.remaining < 2) {	259 if (utf16EncodedBytesIterator.remaining < 2) {

256 utf16EncodedBytesIterator.next();	260 utf16EncodedBytesIterator.moveNext();

257 if (replacementCodepoint != null) {	261 if (replacementCodepoint != null) {

258 return replacementCodepoint;	262 _current = replacementCodepoint;

	263 return true;

259 } else {	264 } else {

260 throw new ArgumentError(	265 throw new ArgumentError(

261 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}");	266 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}");

262 }	267 }

263 } else {	268 } else {

264 return decode();	269 _current = decode();

	270 return true;

265 }	271 }

266 }	272 }

267	273

268 int get position => utf16EncodedBytesIterator.position ~/ 2;	274 int get position => utf16EncodedBytesIterator.position ~/ 2;

269	275

270 void backup([int by = 1]) {	276 void backup([int by = 1]) {

271 utf16EncodedBytesIterator.backup(2 * by);	277 utf16EncodedBytesIterator.backup(2 * by);

272 }	278 }

273	279

274 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2;	280 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2;

275	281

276 void skip([int count = 1]) {	282 void skip([int count = 1]) {

277 utf16EncodedBytesIterator.skip(2 * count);	283 utf16EncodedBytesIterator.skip(2 * count);

278 }	284 }

279	285

280 int decode();	286 int decode();

281 }	287 }

282	288

283 /**	289 /**

284 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes	290 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes

285 * to produce the code unit (0-(2^16)-1).	291 * to produce the code unit (0-(2^16)-1).

286 */	292 */

287 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {	293 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {

288 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [	294 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [

289 int offset = 0, int length, bool stripBom = true,	295 int offset = 0, int length, bool stripBom = true,

290 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	296 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

291 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset,	297 super._fromListRangeIterator(

292 length)).iterator(), replacementCodepoint) {	298 (new _ListRange(utf16EncodedBytes, offset, length)).iterator,

	299 replacementCodepoint) {

293 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) {	300 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) {

294 skip();	301 skip();

295 }	302 }

296 }	303 }

297	304

298 int decode() {	305 int decode() {

299 int hi = utf16EncodedBytesIterator.next();	306 utf16EncodedBytesIterator.moveNext();

300 int lo = utf16EncodedBytesIterator.next();	307 int hi = utf16EncodedBytesIterator.current;

	308 utf16EncodedBytesIterator.moveNext();

	309 int lo = utf16EncodedBytesIterator.current;

301 return (hi << 8) + lo;	310 return (hi << 8) + lo;

302 }	311 }

303 }	312 }

304	313

305 /**	314 /**

306 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes	315 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes

307 * to produce the code unit (0-(2^16)-1).	316 * to produce the code unit (0-(2^16)-1).

308 */	317 */

309 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {	318 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {

310 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [	319 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [

311 int offset = 0, int length, bool stripBom = true,	320 int offset = 0, int length, bool stripBom = true,

312 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :	321 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

313 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset,	322 super._fromListRangeIterator(

314 length)).iterator(), replacementCodepoint) {	323 (new _ListRange(utf16EncodedBytes, offset, length)).iterator,

	324 replacementCodepoint) {

315 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) {	325 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) {

316 skip();	326 skip();

317 }	327 }

318 }	328 }

319	329

320 int decode() {	330 int decode() {

321 int lo = utf16EncodedBytesIterator.next();	331 utf16EncodedBytesIterator.moveNext();

322 int hi = utf16EncodedBytesIterator.next();	332 int lo = utf16EncodedBytesIterator.current;

	333 utf16EncodedBytesIterator.moveNext();

	334 int hi = utf16EncodedBytesIterator.current;

323 return (hi << 8) + lo;	335 return (hi << 8) + lo;

324 }	336 }

325 }	337 }

OLD	NEW

« no previous file with comments | « sdk/lib/uri/uri.dart ('k') | sdk/lib/utf/utf32.dart » ('j') | no next file with comments »