mojo/public/dart/third_party/utf/lib/src/utf/utf32.dart - Issue 1346773002: Stop running pub get at gclient sync time and fix build bugs

Side by Side Diff: mojo/public/dart/third_party/utf/lib/src/utf/utf32.dart

Issue 1346773002: Stop running pub get at gclient sync time and fix build bugs (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 part of utf;

	6

	7 /**

	8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert

	9 * as much of the input as needed. Determines the byte order from the BOM,

	10 * or uses big-endian as a default. This method always strips a leading BOM.

	11 * Set the replacementCharacter to null to throw an ArgumentError

	12 * rather than replace the bad value.

	13 */

	14 IterableUtf32Decoder decodeUtf32AsIterable(List<int> bytes, [

	15 int offset = 0, int length,

	16 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

	17 return new IterableUtf32Decoder._(

	18 () => new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint));

	19 }

	20

	21 /**

	22 * Decodes the UTF-32BE bytes as an iterable. Thus, the consumer can only conver t

	23 * as much of the input as needed. This method strips a leading BOM by default,

	24 * but can be overridden by setting the optional parameter [stripBom] to false.

	25 * Set the replacementCharacter to null to throw an ArgumentError

	26 * rather than replace the bad value.

	27 */

	28 IterableUtf32Decoder decodeUtf32beAsIterable(List<int> bytes, [

	29 int offset = 0, int length, bool stripBom = true,

	30 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

	31 return new IterableUtf32Decoder._(

	32 () => new Utf32beBytesDecoder(bytes, offset, length, stripBom,

	33 replacementCodepoint));

	34 }

	35

	36 /**

	37 * Decodes the UTF-32LE bytes as an iterable. Thus, the consumer can only conver t

	38 * as much of the input as needed. This method strips a leading BOM by default,

	39 * but can be overridden by setting the optional parameter [stripBom] to false.

	40 * Set the replacementCharacter to null to throw an ArgumentError

	41 * rather than replace the bad value.

	42 */

	43 IterableUtf32Decoder decodeUtf32leAsIterable(List<int> bytes, [

	44 int offset = 0, int length, bool stripBom = true,

	45 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

	46 return new IterableUtf32Decoder._(

	47 () => new Utf32leBytesDecoder(bytes, offset, length, stripBom,

	48 replacementCodepoint));

	49 }

	50

	51 /**

	52 * Produce a String from a sequence of UTF-32 encoded bytes. The parameters

	53 * allow an offset into a list of bytes (as int), limiting the length of the

	54 * values be decoded and the ability of override the default Unicode

	55 * replacement character. Set the replacementCharacter to null to throw an

	56 * ArgumentError rather than replace the bad value.

	57 */

	58 String decodeUtf32(List<int> bytes, [int offset = 0, int length,

	59 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

	60 return new String.fromCharCodes((new Utf32BytesDecoder(bytes, offset, length,

	61 replacementCodepoint)).decodeRest());

	62 }

	63 /**

	64 * Produce a String from a sequence of UTF-32BE encoded bytes. The parameters

	65 * allow an offset into a list of bytes (as int), limiting the length of the

	66 * values be decoded and the ability of override the default Unicode

	67 * replacement character. Set the replacementCharacter to null to throw an

	68 * ArgumentError rather than replace the bad value.

	69 */

	70 String decodeUtf32be(

	71 List<int> bytes, [int offset = 0, int length, bool stripBom = true,

	72 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) =>

	73 new String.fromCharCodes((new Utf32beBytesDecoder(bytes, offset, length,

	74 stripBom, replacementCodepoint)).decodeRest());

	75

	76 /**

	77 * Produce a String from a sequence of UTF-32LE encoded bytes. The parameters

	78 * allow an offset into a list of bytes (as int), limiting the length of the

	79 * values be decoded and the ability of override the default Unicode

	80 * replacement character. Set the replacementCharacter to null to throw an

	81 * ArgumentError rather than replace the bad value.

	82 */

	83 String decodeUtf32le(

	84 List<int> bytes, [int offset = 0, int length, bool stripBom = true,

	85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) =>

	86 new String.fromCharCodes((new Utf32leBytesDecoder(bytes, offset, length,

	87 stripBom, replacementCodepoint)).decodeRest());

	88

	89 /**

	90 * Produce a list of UTF-32 encoded bytes. This method prefixes the resulting

	91 * bytes with a big-endian byte-order-marker.

	92 */

	93 List<int> encodeUtf32(String str) =>

	94 encodeUtf32be(str, true);

	95

	96 /**

	97 * Produce a list of UTF-32BE encoded bytes. By default, this method produces

	98 * UTF-32BE bytes with no BOM.

	99 */

	100 List<int> encodeUtf32be(String str, [bool writeBOM = false]) {

	101 List<int> utf32CodeUnits = stringToCodepoints(str);

	102 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +

	103 (writeBOM ? 4 : 0));

	104 int i = 0;

	105 if (writeBOM) {

	106 encoding[i++] = 0;

	107 encoding[i++] = 0;

	108 encoding[i++] = UNICODE_UTF_BOM_HI;

	109 encoding[i++] = UNICODE_UTF_BOM_LO;

	110 }

	111 for (int unit in utf32CodeUnits) {

	112 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK;

	113 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK;

	114 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK;

	115 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;

	116 }

	117 return encoding;

	118 }

	119

	120 /**

	121 * Produce a list of UTF-32LE encoded bytes. By default, this method produces

	122 * UTF-32BE bytes with no BOM.

	123 */

	124 List<int> encodeUtf32le(String str, [bool writeBOM = false]) {

	125 List<int> utf32CodeUnits = stringToCodepoints(str);

	126 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +

	127 (writeBOM ? 4 : 0));

	128 int i = 0;

	129 if (writeBOM) {

	130 encoding[i++] = UNICODE_UTF_BOM_LO;

	131 encoding[i++] = UNICODE_UTF_BOM_HI;

	132 encoding[i++] = 0;

	133 encoding[i++] = 0;

	134 }

	135 for (int unit in utf32CodeUnits) {

	136 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;

	137 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK;

	138 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK;

	139 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK;

	140 }

	141 return encoding;

	142 }

	143

	144 /**

	145 * Identifies whether a List of bytes starts (based on offset) with a

	146 * byte-order marker (BOM).

	147 */

	148 bool hasUtf32Bom(

	149 List<int> utf32EncodedBytes, [int offset = 0, int length]) {

	150 return hasUtf32beBom(utf32EncodedBytes, offset, length) \|\|

	151 hasUtf32leBom(utf32EncodedBytes, offset, length);

	152 }

	153

	154 /**

	155 * Identifies whether a List of bytes starts (based on offset) with a

	156 * big-endian byte-order marker (BOM).

	157 */

	158 bool hasUtf32beBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) {

	159 int end = length != null ? offset + length : utf32EncodedBytes.length;

	160 return (offset + 4) <= end &&

	161 utf32EncodedBytes[offset] == 0 && utf32EncodedBytes[offset + 1] == 0 &&

	162 utf32EncodedBytes[offset + 2] == UNICODE_UTF_BOM_HI &&

	163 utf32EncodedBytes[offset + 3] == UNICODE_UTF_BOM_LO;

	164 }

	165

	166 /**

	167 * Identifies whether a List of bytes starts (based on offset) with a

	168 * little-endian byte-order marker (BOM).

	169 */

	170 bool hasUtf32leBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) {

	171 int end = length != null ? offset + length : utf32EncodedBytes.length;

	172 return (offset + 4) <= end &&

	173 utf32EncodedBytes[offset] == UNICODE_UTF_BOM_LO &&

	174 utf32EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI &&

	175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0;

	176 }

	177

	178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider();

	179

	180 /**

	181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type

	182 * provides an iterator on demand and the iterator will only translate bytes

	183 * as requested by the user of the iterator. (Note: results are not cached.)

	184 */

	185 // TODO(floitsch): Consider removing the extend and switch to implements since

	186 // that's cheaper to allocate.

	187 class IterableUtf32Decoder extends IterableBase<int> {

	188 final Utf32BytesDecoderProvider codeunitsProvider;

	189

	190 IterableUtf32Decoder._(this.codeunitsProvider);

	191

	192 Utf32BytesDecoder get iterator => codeunitsProvider();

	193 }

	194

	195 /**

	196 * Abstrace parent class converts encoded bytes to codepoints.

	197 */

	198 abstract class Utf32BytesDecoder implements ListRangeIterator {

	199 // TODO(kevmoo): should this field be private?

	200 final ListRangeIterator utf32EncodedBytesIterator;

	201 final int replacementCodepoint;

	202 int _current = null;

	203

	204 Utf32BytesDecoder._fromListRangeIterator(

	205 this.utf32EncodedBytesIterator, this.replacementCodepoint);

	206

	207 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [

	208 int offset = 0, int length,

	209 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

	210 if (length == null) {

	211 length = utf32EncodedBytes.length - offset;

	212 }

	213 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) {

	214 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,

	215 false, replacementCodepoint);

	216 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) {

	217 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,

	218 false, replacementCodepoint);

	219 } else {

	220 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false,

	221 replacementCodepoint);

	222 }

	223 }

	224

	225 List<int> decodeRest() {

	226 List<int> codeunits = new List<int>(remaining);

	227 int i = 0;

	228 while (moveNext()) {

	229 codeunits[i++] = current;

	230 }

	231 return codeunits;

	232 }

	233

	234 int get current => _current;

	235

	236 bool moveNext() {

	237 _current = null;

	238 int remaining = utf32EncodedBytesIterator.remaining;

	239 if (remaining == 0) {

	240 _current = null;

	241 return false;

	242 }

	243 if (remaining < 4) {

	244 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining);

	245 if (replacementCodepoint != null) {

	246 _current = replacementCodepoint;

	247 return true;

	248 } else {

	249 throw new ArgumentError(

	250 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");

	251 }

	252 }

	253 int codepoint = decode();

	254 if (_validCodepoint(codepoint)) {

	255 _current = codepoint;

	256 return true;

	257 } else if (replacementCodepoint != null) {

	258 _current = replacementCodepoint;

	259 return true;

	260 } else {

	261 throw new ArgumentError(

	262 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");

	263 }

	264 }

	265

	266 int get position => utf32EncodedBytesIterator.position ~/ 4;

	267

	268 void backup([int by = 1]) {

	269 utf32EncodedBytesIterator.backup(4 * by);

	270 }

	271

	272 int get remaining => (utf32EncodedBytesIterator.remaining + 3) ~/ 4;

	273

	274 void skip([int count = 1]) {

	275 utf32EncodedBytesIterator.skip(4 * count);

	276 }

	277

	278 int decode();

	279 }

	280

	281 /**

	282 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes

	283 * to produce the unicode codepoint.

	284 */

	285 class Utf32beBytesDecoder extends Utf32BytesDecoder {

	286 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,

	287 int length, bool stripBom = true,

	288 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

	289 super._fromListRangeIterator(

	290 (new ListRange(utf32EncodedBytes, offset, length)).iterator,

	291 replacementCodepoint) {

	292 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) {

	293 skip();

	294 }

	295 }

	296

	297 int decode() {

	298 utf32EncodedBytesIterator.moveNext();

	299 int value = utf32EncodedBytesIterator.current;

	300 utf32EncodedBytesIterator.moveNext();

	301 value = (value << 8) + utf32EncodedBytesIterator.current;

	302 utf32EncodedBytesIterator.moveNext();

	303 value = (value << 8) + utf32EncodedBytesIterator.current;

	304 utf32EncodedBytesIterator.moveNext();

	305 value = (value << 8) + utf32EncodedBytesIterator.current;

	306 return value;

	307 }

	308 }

	309

	310 /**

	311 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes

	312 * to produce the unicode codepoint.

	313 */

	314 class Utf32leBytesDecoder extends Utf32BytesDecoder {

	315 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,

	316 int length, bool stripBom = true,

	317 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

	318 super._fromListRangeIterator(

	319 (new ListRange(utf32EncodedBytes, offset, length)).iterator,

	320 replacementCodepoint) {

	321 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) {

	322 skip();

	323 }

	324 }

	325

	326 int decode() {

	327 utf32EncodedBytesIterator.moveNext();

	328 int value = utf32EncodedBytesIterator.current;

	329 utf32EncodedBytesIterator.moveNext();

	330 value += (utf32EncodedBytesIterator.current << 8);

	331 utf32EncodedBytesIterator.moveNext();

	332 value += (utf32EncodedBytesIterator.current << 16);

	333 utf32EncodedBytesIterator.moveNext();

	334 value += (utf32EncodedBytesIterator.current << 24);

	335 return value;

	336 }

	337 }

	338

	339 bool _validCodepoint(int codepoint) {

	340 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) \|\|

	341 (codepoint > UNICODE_UTF16_RESERVED_HI &&

	342 codepoint < UNICODE_VALID_RANGE_MAX);

	343 }

OLD	NEW

« no previous file with comments | « mojo/public/dart/third_party/utf/lib/src/utf/utf16.dart ('k') | mojo/public/dart/third_party/utf/lib/src/utf/utf8.dart » ('j') | no next file with comments »