pkg/third_party/html5lib/lib/src/encoding_parser.dart - Issue 814113004: Pull args, intl, logging, shelf, and source_maps out of the SDK.

Unified Diff: pkg/third_party/html5lib/lib/src/encoding_parser.dart

Issue 814113004: Pull args, intl, logging, shelf, and source_maps out of the SDK. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Also csslib. Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: pkg/third_party/html5lib/lib/src/encoding_parser.dart

diff --git a/pkg/third_party/html5lib/lib/src/encoding_parser.dart b/pkg/third_party/html5lib/lib/src/encoding_parser.dart

deleted file mode 100644

index 8bb861c426191e01fc6a24c92fc3fede64ecc949..0000000000000000000000000000000000000000

--- a/pkg/third_party/html5lib/lib/src/encoding_parser.dart

+++ /dev/null

@@ -1,378 +0,0 @@

-library encoding_parser;

-import 'dart:collection';

-import 'constants.dart';

-import 'inputstream.dart';

-// TODO(jmesserly): I converted StopIteration to StateError("No more elements").

-// Seems strange to throw this from outside of an iterator though.

-/// String-like object with an associated position and various extra methods

-/// If the position is ever greater than the string length then an exception is

-/// raised.

-class EncodingBytes extends IterableBase<String> {

- final String _bytes;

- int _position = -1;

- EncodingBytes(this._bytes);

- Iterator<String> get iterator => _bytes.split('').iterator;

- int get length => _bytes.length;

- String next() {

- var p = _position = _position + 1;

- if (p >= length) {

- throw new StateError("No more elements");

- } else if (p < 0) {

- throw new RangeError(p);

- }

- return _bytes[p];

- }

- String previous() {

- var p = _position;

- if (p >= length) {

- throw new StateError("No more elements");

- } else if (p < 0) {

- throw new RangeError(p);

- }

- _position = p = p - 1;

- return _bytes[p];

- }

- set position(int value) {

- if (_position >= length) {

- throw new StateError("No more elements");

- }

- _position = value;

- }

- int get position {

- if (_position >= length) {

- throw new StateError("No more elements");

- }

- if (_position >= 0) {

- return _position;

- } else {

- return 0;

- }

- String get currentByte => _bytes[position];

- /// Skip past a list of characters. Defaults to skipping [isWhitespace].

- String skipChars([CharPreciate skipChars]) {

- if (skipChars == null) skipChars = isWhitespace;

- var p = position; // use property for the error-checking

- while (p < length) {

- var c = _bytes[p];

- if (!skipChars(c)) {

- _position = p;

- return c;

- }

- p += 1;

- }

- _position = p;

- return null;

- }

- String skipUntil(CharPreciate untilChars) {

- var p = position;

- while (p < length) {

- var c = _bytes[p];

- if (untilChars(c)) {

- _position = p;

- return c;

- }

- p += 1;

- }

- return null;

- }

- /// Look for a sequence of bytes at the start of a string. If the bytes

- /// are found return true and advance the position to the byte after the

- /// match. Otherwise return false and leave the position alone.

- bool matchBytes(String bytes) {

- var p = position;

- if (_bytes.length < p + bytes.length) {

- return false;

- }

- var data = _bytes.substring(p, p + bytes.length);

- if (data == bytes) {

- position += bytes.length;

- return true;

- }

- return false;

- }

- /// Look for the next sequence of bytes matching a given sequence. If

- /// a match is found advance the position to the last byte of the match

- bool jumpTo(String bytes) {

- var newPosition = _bytes.indexOf(bytes, position);

- if (newPosition >= 0) {

- _position = newPosition + bytes.length - 1;

- return true;

- } else {

- throw new StateError("No more elements");

- }

- String slice(int start, [int end]) {

- if (end == null) end = length;

- if (end < 0) end += length;

- return _bytes.substring(start, end - start);

- }

-/// Mini parser for detecting character encoding from meta elements.

-class EncodingParser {

- final EncodingBytes data;

- String encoding;

- /// [bytes] - the data to work on for encoding detection.

- EncodingParser(List<int> bytes)

- // Note: this is intentionally interpreting bytes as codepoints.

- : data = new EncodingBytes(new String.fromCharCodes(bytes).toLowerCase());

- String getEncoding() {

- final methodDispatch = [

- ["<!--", handleComment],

- ["<meta", handleMeta],

- ["</", handlePossibleEndTag],

- ["<!", handleOther],

- ["<?", handleOther],

- ["<", handlePossibleStartTag]];

- try {

- for (var byte in data) {

- var keepParsing = true;

- for (var dispatch in methodDispatch) {

- if (data.matchBytes(dispatch[0])) {

- try {

- keepParsing = dispatch[1]();

- break;

- } on StateError catch (e) {

- keepParsing = false;

- break;

- }

- if (!keepParsing) {

- break;

- }

- } on StateError catch (e) {

- // Catch this here to match behavior of Python's StopIteration

- }

- return encoding;

- }

- /// Skip over comments.

- bool handleComment() => data.jumpTo("-->");

- bool handleMeta() {

- if (!isWhitespace(data.currentByte)) {

- // if we have <meta not followed by a space so just keep going

- return true;

- }

- // We have a valid meta element we want to search for attributes

- while (true) {

- // Try to find the next attribute after the current position

- var attr = getAttribute();

- if (attr == null) return true;

- if (attr[0] == "charset") {

- var tentativeEncoding = attr[1];

- var codec = codecName(tentativeEncoding);

- if (codec != null) {

- encoding = codec;

- return false;

- }

- } else if (attr[0] == "content") {

- var contentParser = new ContentAttrParser(new EncodingBytes(attr[1]));

- var tentativeEncoding = contentParser.parse();

- var codec = codecName(tentativeEncoding);

- if (codec != null) {

- encoding = codec;

- return false;

- }

- return true; // unreachable

- }

- bool handlePossibleStartTag() => handlePossibleTag(false);

- bool handlePossibleEndTag() {

- data.next();

- return handlePossibleTag(true);

- }

- bool handlePossibleTag(bool endTag) {

- if (!isLetter(data.currentByte)) {

- //If the next byte is not an ascii letter either ignore this

- //fragment (possible start tag case) or treat it according to

- //handleOther

- if (endTag) {

- data.previous();

- handleOther();

- }

- return true;

- }

- var c = data.skipUntil(isSpaceOrAngleBracket);

- if (c == "<") {

- // return to the first step in the overall "two step" algorithm

- // reprocessing the < byte

- data.previous();

- } else {

- //Read all attributes

- var attr = getAttribute();

- while (attr != null) {

- attr = getAttribute();

- }

- return true;

- }

- bool handleOther() => data.jumpTo(">");

- /// Return a name,value pair for the next attribute in the stream,

- /// if one is found, or null

- List<String> getAttribute() {

- // Step 1 (skip chars)

- var c = data.skipChars((x) => x == "/" || isWhitespace(x));

- // Step 2

- if (c == ">" || c == null) {

- return null;

- }

- // Step 3

- var attrName = [];

- var attrValue = [];

- // Step 4 attribute name

- while (true) {

- if (c == null) {

- return null;

- } else if (c == "=" && attrName.length > 0) {

- break;

- } else if (isWhitespace(c)) {

- // Step 6!

- c = data.skipChars();

- c = data.next();

- break;

- } else if (c == "/" || c == ">") {

- return [attrName.join(), ""];

- } else if (isLetter(c)) {

- attrName.add(c.toLowerCase());

- } else {

- attrName.add(c);

- }

- // Step 5

- c = data.next();

- }

- // Step 7

- if (c != "=") {

- data.previous();

- return [attrName.join(), ""];

- }

- // Step 8

- data.next();

- // Step 9

- c = data.skipChars();

- // Step 10

- if (c == "'" || c == '"') {

- // 10.1

- var quoteChar = c;

- while (true) {

- // 10.2

- c = data.next();

- if (c == quoteChar) {

- // 10.3

- data.next();

- return [attrName.join(), attrValue.join()];

- } else if (isLetter(c)) {

- // 10.4

- attrValue.add(c.toLowerCase());

- } else {

- // 10.5

- attrValue.add(c);

- }

- } else if (c == ">") {

- return [attrName.join(), ""];

- } else if (c == null) {

- return null;

- } else if (isLetter(c)) {

- attrValue.add(c.toLowerCase());

- } else {

- attrValue.add(c);

- }

- // Step 11

- while (true) {

- c = data.next();

- if (isSpaceOrAngleBracket(c)) {

- return [attrName.join(), attrValue.join()];

- } else if (c == null) {

- return null;

- } else if (isLetter(c)) {

- attrValue.add(c.toLowerCase());

- } else {

- attrValue.add(c);

- }

- return null; // unreachable

- }

-class ContentAttrParser {

- final EncodingBytes data;

- ContentAttrParser(this.data);

- String parse() {

- try {

- // Check if the attr name is charset

- // otherwise return

- data.jumpTo("charset");

- data.position += 1;

- data.skipChars();

- if (data.currentByte != "=") {

- // If there is no = sign keep looking for attrs

- return null;

- }

- data.position += 1;

- data.skipChars();

- // Look for an encoding between matching quote marks

- if (data.currentByte == '"' || data.currentByte == "'") {

- var quoteMark = data.currentByte;

- data.position += 1;

- var oldPosition = data.position;

- if (data.jumpTo(quoteMark)) {

- return data.slice(oldPosition, data.position);

- } else {

- return null;

- }

- } else {

- // Unquoted value

- var oldPosition = data.position;

- try {

- data.skipUntil(isWhitespace);

- return data.slice(oldPosition, data.position);

- } on StateError catch (e) {

- //Return the whole remaining value

- return data.slice(oldPosition);

- }

- } on StateError catch (e) {

- return null;

- }

-bool isSpaceOrAngleBracket(String char) {

- return char == ">" || char == "<" || isWhitespace(char);

-typedef bool CharPreciate(String char);

« no previous file with comments | « pkg/third_party/html5lib/lib/src/css_class_set.dart ('k') | pkg/third_party/html5lib/lib/src/inputstream.dart » ('j') | no next file with comments »