mojo/public/dart/third_party/html/lib/src/encoding_parser.dart - Issue 1346773002: Stop running pub get at gclient sync time and fix build bugs

Side by Side Diff: mojo/public/dart/third_party/html/lib/src/encoding_parser.dart

Issue 1346773002: Stop running pub get at gclient sync time and fix build bugs (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « mojo/public/dart/third_party/html/lib/src/css_class_set.dart ('k') | mojo/public/dart/third_party/html/lib/src/inputstream.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 library encoding_parser;

	2

	3 import 'constants.dart';

	4 import 'inputstream.dart';

	5

	6 // TODO(jmesserly): I converted StopIteration to StateError("No more elements").

	7 // Seems strange to throw this from outside of an iterator though.

	8 /// String-like object with an associated position and various extra methods

	9 /// If the position is ever greater than the string length then an exception is

	10 /// raised.

	11 class EncodingBytes {

	12 final String _bytes;

	13 int _position = -1;

	14

	15 EncodingBytes(this._bytes);

	16

	17 int get length => _bytes.length;

	18

	19 String next() {

	20 var p = _position = _position + 1;

	21 if (p >= length) {

	22 throw new StateError("No more elements");

	23 } else if (p < 0) {

	24 throw new RangeError(p);

	25 }

	26 return _bytes[p];

	27 }

	28

	29 String previous() {

	30 var p = _position;

	31 if (p >= length) {

	32 throw new StateError("No more elements");

	33 } else if (p < 0) {

	34 throw new RangeError(p);

	35 }

	36 _position = p = p - 1;

	37 return _bytes[p];

	38 }

	39

	40 set position(int value) {

	41 if (_position >= length) {

	42 throw new StateError("No more elements");

	43 }

	44 _position = value;

	45 }

	46

	47 int get position {

	48 if (_position >= length) {

	49 throw new StateError("No more elements");

	50 }

	51 if (_position >= 0) {

	52 return _position;

	53 } else {

	54 return 0;

	55 }

	56 }

	57

	58 String get currentByte => _bytes[position];

	59

	60 /// Skip past a list of characters. Defaults to skipping [isWhitespace].

	61 String skipChars([CharPreciate skipChars]) {

	62 if (skipChars == null) skipChars = isWhitespace;

	63 var p = position; // use property for the error-checking

	64 while (p < length) {

	65 var c = _bytes[p];

	66 if (!skipChars(c)) {

	67 _position = p;

	68 return c;

	69 }

	70 p += 1;

	71 }

	72 _position = p;

	73 return null;

	74 }

	75

	76 String skipUntil(CharPreciate untilChars) {

	77 var p = position;

	78 while (p < length) {

	79 var c = _bytes[p];

	80 if (untilChars(c)) {

	81 _position = p;

	82 return c;

	83 }

	84 p += 1;

	85 }

	86 return null;

	87 }

	88

	89 /// Look for a sequence of bytes at the start of a string. If the bytes

	90 /// are found return true and advance the position to the byte after the

	91 /// match. Otherwise return false and leave the position alone.

	92 bool matchBytes(String bytes) {

	93 var p = position;

	94 if (_bytes.length < p + bytes.length) {

	95 return false;

	96 }

	97 var data = _bytes.substring(p, p + bytes.length);

	98 if (data == bytes) {

	99 position += bytes.length;

	100 return true;

	101 }

	102 return false;

	103 }

	104

	105 /// Look for the next sequence of bytes matching a given sequence. If

	106 /// a match is found advance the position to the last byte of the match

	107 bool jumpTo(String bytes) {

	108 var newPosition = _bytes.indexOf(bytes, position);

	109 if (newPosition >= 0) {

	110 _position = newPosition + bytes.length - 1;

	111 return true;

	112 } else {

	113 throw new StateError("No more elements");

	114 }

	115 }

	116

	117 String slice(int start, [int end]) {

	118 if (end == null) end = length;

	119 if (end < 0) end += length;

	120 return _bytes.substring(start, end - start);

	121 }

	122 }

	123

	124 /// Mini parser for detecting character encoding from meta elements.

	125 class EncodingParser {

	126 final EncodingBytes data;

	127 String encoding;

	128

	129 /// [bytes] - the data to work on for encoding detection.

	130 EncodingParser(List<int> bytes)

	131 // Note: this is intentionally interpreting bytes as codepoints.

	132 : data = new EncodingBytes(new String.fromCharCodes(bytes).toLowerCase());

	133

	134 String getEncoding() {

	135 final methodDispatch = [

	136 ["<!--", handleComment],

	137 ["<meta", handleMeta],

	138 ["</", handlePossibleEndTag],

	139 ["<!", handleOther],

	140 ["<?", handleOther],

	141 ["<", handlePossibleStartTag]

	142 ];

	143

	144 try {

	145 for (;;) {

	146 for (var dispatch in methodDispatch) {

	147 if (data.matchBytes(dispatch[0])) {

	148 var keepParsing = dispatch[1]();

	149 if (keepParsing) break;

	150

	151 // We found an encoding. Stop.

	152 return encoding;

	153 }

	154 }

	155 data.position += 1;

	156 }

	157 } on StateError catch (e) {

	158 // Catch this here to match behavior of Python's StopIteration

	159 // TODO(jmesserly): refactor to not use exceptions

	160 }

	161 return encoding;

	162 }

	163

	164 /// Skip over comments.

	165 bool handleComment() => data.jumpTo("-->");

	166

	167 bool handleMeta() {

	168 if (!isWhitespace(data.currentByte)) {

	169 // if we have <meta not followed by a space so just keep going

	170 return true;

	171 }

	172 // We have a valid meta element we want to search for attributes

	173 while (true) {

	174 // Try to find the next attribute after the current position

	175 var attr = getAttribute();

	176 if (attr == null) return true;

	177

	178 if (attr[0] == "charset") {

	179 var tentativeEncoding = attr[1];

	180 var codec = codecName(tentativeEncoding);

	181 if (codec != null) {

	182 encoding = codec;

	183 return false;

	184 }

	185 } else if (attr[0] == "content") {

	186 var contentParser = new ContentAttrParser(new EncodingBytes(attr[1]));

	187 var tentativeEncoding = contentParser.parse();

	188 var codec = codecName(tentativeEncoding);

	189 if (codec != null) {

	190 encoding = codec;

	191 return false;

	192 }

	193 }

	194 }

	195 return true; // unreachable

	196 }

	197

	198 bool handlePossibleStartTag() => handlePossibleTag(false);

	199

	200 bool handlePossibleEndTag() {

	201 data.next();

	202 return handlePossibleTag(true);

	203 }

	204

	205 bool handlePossibleTag(bool endTag) {

	206 if (!isLetter(data.currentByte)) {

	207 //If the next byte is not an ascii letter either ignore this

	208 //fragment (possible start tag case) or treat it according to

	209 //handleOther

	210 if (endTag) {

	211 data.previous();

	212 handleOther();

	213 }

	214 return true;

	215 }

	216

	217 var c = data.skipUntil(isSpaceOrAngleBracket);

	218 if (c == "<") {

	219 // return to the first step in the overall "two step" algorithm

	220 // reprocessing the < byte

	221 data.previous();

	222 } else {

	223 //Read all attributes

	224 var attr = getAttribute();

	225 while (attr != null) {

	226 attr = getAttribute();

	227 }

	228 }

	229 return true;

	230 }

	231

	232 bool handleOther() => data.jumpTo(">");

	233

	234 /// Return a name,value pair for the next attribute in the stream,

	235 /// if one is found, or null

	236 List<String> getAttribute() {

	237 // Step 1 (skip chars)

	238 var c = data.skipChars((x) => x == "/" \|\| isWhitespace(x));

	239 // Step 2

	240 if (c == ">" \|\| c == null) {

	241 return null;

	242 }

	243 // Step 3

	244 var attrName = [];

	245 var attrValue = [];

	246 // Step 4 attribute name

	247 while (true) {

	248 if (c == null) {

	249 return null;

	250 } else if (c == "=" && attrName.length > 0) {

	251 break;

	252 } else if (isWhitespace(c)) {

	253 // Step 6!

	254 c = data.skipChars();

	255 c = data.next();

	256 break;

	257 } else if (c == "/" \|\| c == ">") {

	258 return [attrName.join(), ""];

	259 } else if (isLetter(c)) {

	260 attrName.add(c.toLowerCase());

	261 } else {

	262 attrName.add(c);

	263 }

	264 // Step 5

	265 c = data.next();

	266 }

	267 // Step 7

	268 if (c != "=") {

	269 data.previous();

	270 return [attrName.join(), ""];

	271 }

	272 // Step 8

	273 data.next();

	274 // Step 9

	275 c = data.skipChars();

	276 // Step 10

	277 if (c == "'" \|\| c == '"') {

	278 // 10.1

	279 var quoteChar = c;

	280 while (true) {

	281 // 10.2

	282 c = data.next();

	283 if (c == quoteChar) {

	284 // 10.3

	285 data.next();

	286 return [attrName.join(), attrValue.join()];

	287 } else if (isLetter(c)) {

	288 // 10.4

	289 attrValue.add(c.toLowerCase());

	290 } else {

	291 // 10.5

	292 attrValue.add(c);

	293 }

	294 }

	295 } else if (c == ">") {

	296 return [attrName.join(), ""];

	297 } else if (c == null) {

	298 return null;

	299 } else if (isLetter(c)) {

	300 attrValue.add(c.toLowerCase());

	301 } else {

	302 attrValue.add(c);

	303 }

	304 // Step 11

	305 while (true) {

	306 c = data.next();

	307 if (isSpaceOrAngleBracket(c)) {

	308 return [attrName.join(), attrValue.join()];

	309 } else if (c == null) {

	310 return null;

	311 } else if (isLetter(c)) {

	312 attrValue.add(c.toLowerCase());

	313 } else {

	314 attrValue.add(c);

	315 }

	316 }

	317 return null; // unreachable

	318 }

	319 }

	320

	321 class ContentAttrParser {

	322 final EncodingBytes data;

	323

	324 ContentAttrParser(this.data);

	325

	326 String parse() {

	327 try {

	328 // Check if the attr name is charset

	329 // otherwise return

	330 data.jumpTo("charset");

	331 data.position += 1;

	332 data.skipChars();

	333 if (data.currentByte != "=") {

	334 // If there is no = sign keep looking for attrs

	335 return null;

	336 }

	337 data.position += 1;

	338 data.skipChars();

	339 // Look for an encoding between matching quote marks

	340 if (data.currentByte == '"' \|\| data.currentByte == "'") {

	341 var quoteMark = data.currentByte;

	342 data.position += 1;

	343 var oldPosition = data.position;

	344 if (data.jumpTo(quoteMark)) {

	345 return data.slice(oldPosition, data.position);

	346 } else {

	347 return null;

	348 }

	349 } else {

	350 // Unquoted value

	351 var oldPosition = data.position;

	352 try {

	353 data.skipUntil(isWhitespace);

	354 return data.slice(oldPosition, data.position);

	355 } on StateError catch (e) {

	356 //Return the whole remaining value

	357 return data.slice(oldPosition);

	358 }

	359 }

	360 } on StateError catch (e) {

	361 return null;

	362 }

	363 }

	364 }

	365

	366 bool isSpaceOrAngleBracket(String char) {

	367 return char == ">" \|\| char == "<" \|\| isWhitespace(char);

	368 }

	369

	370 typedef bool CharPreciate(String char);

OLD	NEW