pkg/third_party/html5lib/lib/src/tokenizer.dart - Issue 814113004: Pull args, intl, logging, shelf, and source_maps out of the SDK.

Unified Diff: pkg/third_party/html5lib/lib/src/tokenizer.dart

Issue 814113004: Pull args, intl, logging, shelf, and source_maps out of the SDK. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Also csslib. Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: pkg/third_party/html5lib/lib/src/tokenizer.dart

diff --git a/pkg/third_party/html5lib/lib/src/tokenizer.dart b/pkg/third_party/html5lib/lib/src/tokenizer.dart

deleted file mode 100644

index 1b6311497ac4a3b4c188032a6d852a276c13804c..0000000000000000000000000000000000000000

--- a/pkg/third_party/html5lib/lib/src/tokenizer.dart

+++ /dev/null

@@ -1,1886 +0,0 @@

-library tokenizer;

-import 'dart:collection';

-import 'package:html5lib/parser.dart' show HtmlParser;

-import 'constants.dart';

-import 'inputstream.dart';

-import 'token.dart';

-import 'utils.dart';

-// Group entities by their first character, for faster lookups

-// TODO(jmesserly): we could use a better data structure here like a trie, if

-// we had it implemented in Dart.

-Map<String, List<String>> entitiesByFirstChar = (() {

- var result = {};

- for (var k in entities.keys) {

- result.putIfAbsent(k[0], () => []).add(k);

- }

- return result;

-})();

-// TODO(jmesserly): lots of ways to make this faster:

-// - use char codes everywhere instead of 1-char strings

-// - use switch instead of contains, indexOf

-// - use switch instead of the sequential if tests

-// - avoid string concat

-/// This class takes care of tokenizing HTML.

-class HtmlTokenizer implements Iterator<Token> {

- // TODO(jmesserly): a lot of these could be made private

- final HtmlInputStream stream;

- final bool lowercaseElementName;

- final bool lowercaseAttrName;

- /// True to generate spans in for [Token.span].

- final bool generateSpans;

- /// True to generate spans for attributes.

- final bool attributeSpans;

- /// This reference to the parser is used for correct CDATA handling.

- /// The [HtmlParser] will set this at construction time.

- HtmlParser parser;

- final Queue<Token> tokenQueue;

- /// Holds the token that is currently being processed.

- Token currentToken;

- /// Holds a reference to the method to be invoked for the next parser state.

- // TODO(jmesserly): the type should be "Predicate" but a dart2js checked mode

- // bug prevents us from doing that. See http://dartbug.com/12465

- Function state;

- String temporaryBuffer;

- int _lastOffset;

- // TODO(jmesserly): ideally this would be a LinkedHashMap and we wouldn't add

- // an item until it's ready. But the code doesn't have a clear notion of when

- // it's "done" with the attribute.

- List<TagAttribute> _attributes;

- Set<String> _attributeNames;

- HtmlTokenizer(doc, {String encoding, bool parseMeta: true,

- this.lowercaseElementName: true, this.lowercaseAttrName: true,

- bool generateSpans: false, String sourceUrl, this.attributeSpans: false})

- : stream = new HtmlInputStream(

- doc, encoding, parseMeta, generateSpans, sourceUrl),

- tokenQueue = new Queue(),

- generateSpans = generateSpans {

- reset();

- }

- TagToken get currentTagToken => currentToken;

- DoctypeToken get currentDoctypeToken => currentToken;

- StringToken get currentStringToken => currentToken;

- Token _current;

- Token get current => _current;

- String get _attributeName => _attributes.last.name;

- set _attributeName(String value) {

- _attributes.last.name = value;

- }

- String get _attributeValue => _attributes.last.value;

- set _attributeValue(String value) {

- _attributes.last.value = value;

- }

- void _markAttributeEnd(int offset) {

- if (attributeSpans) _attributes.last.end = stream.position + offset;

- }

- void _markAttributeValueStart(int offset) {

- if (attributeSpans) _attributes.last.startValue = stream.position + offset;

- }

- void _markAttributeValueEnd(int offset) {

- if (attributeSpans) {

- _attributes.last.endValue = stream.position + offset;

- _markAttributeEnd(offset);

- }

- // Note: we could track the name span here, if we need it.

- void _markAttributeNameEnd(int offset) => _markAttributeEnd(offset);

- void _addAttribute(String name) {

- if (_attributes == null) _attributes = [];

- var attr = new TagAttribute(name);

- _attributes.add(attr);

- if (attributeSpans) attr.start = stream.position - name.length;

- }

- /// This is where the magic happens.

- ///

- /// We do our usually processing through the states and when we have a token

- /// to return we yield the token which pauses processing until the next token

- /// is requested.

- bool moveNext() {

- // Start processing. When EOF is reached state will return false;

- // instead of true and the loop will terminate.

- while (stream.errors.length == 0 && tokenQueue.length == 0) {

- if (!state()) {

- _current = null;

- return false;

- }

- if (stream.errors.length > 0) {

- _current = new ParseErrorToken(stream.errors.removeFirst());

- } else {

- assert (tokenQueue.length > 0);

- _current = tokenQueue.removeFirst();

- }

- return true;

- }

- /// Resets the tokenizer state. Calling this does not reset the [stream] or

- /// the [parser].

- void reset() {

- _lastOffset = 0;

- tokenQueue.clear();

- currentToken = null;

- temporaryBuffer = null;

- _attributes = null;

- _attributeNames = null;

- state = dataState;

- }

- /// Adds a token to the queue. Sets the span if needed.

- void _addToken(Token token) {

- if (generateSpans && token.span == null) {

- int offset = stream.position;

- token.span = stream.fileInfo.span(_lastOffset, offset);

- if (token is! ParseErrorToken) {

- _lastOffset = offset;

- }

- tokenQueue.add(token);

- }

- /// This function returns either U+FFFD or the character based on the

- /// decimal or hexadecimal representation. It also discards ";" if present.

- /// If not present it will add a [ParseErrorToken].

- String consumeNumberEntity(bool isHex) {

- var allowed = isDigit;

- var radix = 10;

- if (isHex) {

- allowed = isHexDigit;

- radix = 16;

- }

- var charStack = [];

- // Consume all the characters that are in range while making sure we

- // don't hit an EOF.

- var c = stream.char();

- while (allowed(c) && c != EOF) {

- charStack.add(c);

- c = stream.char();

- }

- // Convert the set of characters consumed to an int.

- var charAsInt = parseIntRadix(charStack.join(), radix);

- // Certain characters get replaced with others

- var char = replacementCharacters[charAsInt];

- if (char != null) {

- _addToken(new ParseErrorToken(

- "illegal-codepoint-for-numeric-entity",

- messageParams: {"charAsInt": charAsInt}));

- } else if ((0xD800 <= charAsInt && charAsInt <= 0xDFFF)

- || (charAsInt > 0x10FFFF)) {

- char = "\uFFFD";

- _addToken(new ParseErrorToken(

- "illegal-codepoint-for-numeric-entity",

- messageParams: {"charAsInt": charAsInt}));

- } else {

- // Should speed up this check somehow (e.g. move the set to a constant)

- if ((0x0001 <= charAsInt && charAsInt <= 0x0008) ||

- (0x000E <= charAsInt && charAsInt <= 0x001F) ||

- (0x007F <= charAsInt && charAsInt <= 0x009F) ||

- (0xFDD0 <= charAsInt && charAsInt <= 0xFDEF) ||

- const [0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,

- 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,

- 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,

- 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,

- 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE,

- 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE,

- 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,

- 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE,

- 0xFFFFF, 0x10FFFE, 0x10FFFF].contains(charAsInt)) {

- _addToken(new ParseErrorToken(

- "illegal-codepoint-for-numeric-entity",

- messageParams: {"charAsInt": charAsInt}));

- }

- char = new String.fromCharCodes([charAsInt]);

- }

- // Discard the ; if present. Otherwise, put it back on the queue and

- // invoke parseError on parser.

- if (c != ";") {

- _addToken(new ParseErrorToken(

- "numeric-entity-without-semicolon"));

- stream.unget(c);

- }

- return char;

- }

- void consumeEntity({String allowedChar, bool fromAttribute: false}) {

- // Initialise to the default output for when no entity is matched

- var output = "&";

- var charStack = [stream.char()];

- if (isWhitespace(charStack[0]) || charStack[0] == '<' || charStack[0] == '&'

- || charStack[0] == EOF || allowedChar == charStack[0]) {

- stream.unget(charStack[0]);

- } else if (charStack[0] == "#") {

- // Read the next character to see if it's hex or decimal

- bool hex = false;

- charStack.add(stream.char());

- if (charStack.last == 'x' || charStack.last == 'X') {

- hex = true;

- charStack.add(stream.char());

- }

- // charStack.last should be the first digit

- if (hex && isHexDigit(charStack.last) ||

- (!hex && isDigit(charStack.last))) {

- // At least one digit found, so consume the whole number

- stream.unget(charStack.last);

- output = consumeNumberEntity(hex);

- } else {

- // No digits found

- _addToken(new ParseErrorToken("expected-numeric-entity"));

- stream.unget(charStack.removeLast());

- output = "&${charStack.join()}";

- }

- } else {

- // At this point in the process might have named entity. Entities

- // are stored in the global variable "entities".

- //

- // Consume characters and compare to these to a substring of the

- // entity names in the list until the substring no longer matches.

- var filteredEntityList = entitiesByFirstChar[charStack[0]];

- if (filteredEntityList == null) filteredEntityList = const [];

- while (charStack.last != EOF) {

- var name = charStack.join();

- filteredEntityList = filteredEntityList.where(

- (e) => e.startsWith(name)).toList();

- if (filteredEntityList.length == 0) {

- break;

- }

- charStack.add(stream.char());

- }

- // At this point we have a string that starts with some characters

- // that may match an entity

- String entityName = null;

- // Try to find the longest entity the string will match to take care

- // of &noti for instance.

- int entityLen;

- for (entityLen = charStack.length - 1; entityLen > 1; entityLen--) {

- var possibleEntityName = charStack.sublist(0, entityLen).join();

- if (entities.containsKey(possibleEntityName)) {

- entityName = possibleEntityName;

- break;

- }

- if (entityName != null) {

- var lastChar = entityName[entityName.length - 1];

- if (lastChar != ";") {

- _addToken(new ParseErrorToken(

- "named-entity-without-semicolon"));

- }

- if (lastChar != ";" && fromAttribute &&

- (isLetterOrDigit(charStack[entityLen]) ||

- charStack[entityLen] == '=')) {

- stream.unget(charStack.removeLast());

- output = "&${charStack.join()}";

- } else {

- output = entities[entityName];

- stream.unget(charStack.removeLast());

- output = '${output}${slice(charStack, entityLen).join()}';

- }

- } else {

- _addToken(new ParseErrorToken("expected-named-entity"));

- stream.unget(charStack.removeLast());

- output = "&${charStack.join()}";

- }

- if (fromAttribute) {

- _attributeValue = '$_attributeValue$output';

- } else {

- var token;

- if (isWhitespace(output)) {

- token = new SpaceCharactersToken(output);

- } else {

- token = new CharactersToken(output);

- }

- _addToken(token);

- }

- /// This method replaces the need for "entityInAttributeValueState".

- void processEntityInAttribute(String allowedChar) {

- consumeEntity(allowedChar: allowedChar, fromAttribute: true);

- }

- /// This method is a generic handler for emitting the tags. It also sets

- /// the state to "data" because that's what's needed after a token has been

- /// emitted.

- void emitCurrentToken() {

- var token = currentToken;

- // Add token to the queue to be yielded

- if (token is TagToken) {

- if (lowercaseElementName) {

- token.name = asciiUpper2Lower(token.name);

- }

- if (token is EndTagToken) {

- if (_attributes != null) {

- _addToken(new ParseErrorToken("attributes-in-end-tag"));

- }

- if (token.selfClosing) {

- _addToken(new ParseErrorToken("this-closing-flag-on-end-tag"));

- }

- } else if (token is StartTagToken) {

- // HTML5 specific normalizations to the token stream.

- // Convert the list into a map where first key wins.

- token.data = new LinkedHashMap<Object, String>();

- if (_attributes != null) {

- for (var attr in _attributes) {

- token.data.putIfAbsent(attr.name, () => attr.value);

- }

- if (attributeSpans) token.attributeSpans = _attributes;

- }

- _attributes = null;

- _attributeNames = null;

- }

- _addToken(token);

- state = dataState;

- }

- // Below are the various tokenizer states worked out.

- bool dataState() {

- var data = stream.char();

- if (data == "&") {

- state = entityDataState;

- } else if (data == "<") {

- state = tagOpenState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\u0000"));

- } else if (data == EOF) {

- // Tokenization ends.

- return false;

- } else if (isWhitespace(data)) {

- // Directly after emitting a token you switch back to the "data

- // state". At that point spaceCharacters are important so they are

- // emitted separately.

- _addToken(new SpaceCharactersToken(

- '${data}${stream.charsUntil(spaceCharacters, true)}'));

- // No need to update lastFourChars here, since the first space will

- // have already been appended to lastFourChars and will have broken

- // any  sequences

- } else {

- var chars = stream.charsUntil("&<\u0000");

- _addToken(new CharactersToken('${data}${chars}'));

- }

- return true;

- }

- bool entityDataState() {

- consumeEntity();

- state = dataState;

- return true;

- }

- bool rcdataState() {

- var data = stream.char();

- if (data == "&") {

- state = characterReferenceInRcdata;

- } else if (data == "<") {

- state = rcdataLessThanSignState;

- } else if (data == EOF) {

- // Tokenization ends.

- return false;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\uFFFD"));

- } else if (isWhitespace(data)) {

- // Directly after emitting a token you switch back to the "data

- // state". At that point spaceCharacters are important so they are

- // emitted separately.

- _addToken(new SpaceCharactersToken(

- '${data}${stream.charsUntil(spaceCharacters, true)}'));

- } else {

- var chars = stream.charsUntil("&<");

- _addToken(new CharactersToken('${data}${chars}'));

- }

- return true;

- }

- bool characterReferenceInRcdata() {

- consumeEntity();

- state = rcdataState;

- return true;

- }

- bool rawtextState() {

- var data = stream.char();

- if (data == "<") {

- state = rawtextLessThanSignState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\uFFFD"));

- } else if (data == EOF) {

- // Tokenization ends.

- return false;

- } else {

- var chars = stream.charsUntil("<\u0000");

- _addToken(new CharactersToken("${data}${chars}"));

- }

- return true;

- }

- bool scriptDataState() {

- var data = stream.char();

- if (data == "<") {

- state = scriptDataLessThanSignState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\uFFFD"));

- } else if (data == EOF) {

- // Tokenization ends.

- return false;

- } else {

- var chars = stream.charsUntil("<\u0000");

- _addToken(new CharactersToken("${data}${chars}"));

- }

- return true;

- }

- bool plaintextState() {

- var data = stream.char();

- if (data == EOF) {

- // Tokenization ends.

- return false;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\uFFFD"));

- } else {

- _addToken(new CharactersToken(

- '${data}${stream.charsUntil("\u0000")}'));

- }

- return true;

- }

- bool tagOpenState() {

- var data = stream.char();

- if (data == "!") {

- state = markupDeclarationOpenState;

- } else if (data == "/") {

- state = closeTagOpenState;

- } else if (isLetter(data)) {

- currentToken = new StartTagToken(data);

- state = tagNameState;

- } else if (data == ">") {

- // XXX In theory it could be something besides a tag name. But

- // do we really care?

- _addToken(new ParseErrorToken(

- "expected-tag-name-but-got-right-bracket"));

- _addToken(new CharactersToken("<>"));

- state = dataState;

- } else if (data == "?") {

- // XXX In theory it could be something besides a tag name. But

- // do we really care?

- _addToken(new ParseErrorToken(

- "expected-tag-name-but-got-question-mark"));

- stream.unget(data);

- state = bogusCommentState;

- } else {

- // XXX

- _addToken(new ParseErrorToken("expected-tag-name"));

- _addToken(new CharactersToken("<"));

- stream.unget(data);

- state = dataState;

- }

- return true;

- }

- bool closeTagOpenState() {

- var data = stream.char();

- if (isLetter(data)) {

- currentToken = new EndTagToken(data);

- state = tagNameState;

- } else if (data == ">") {

- _addToken(new ParseErrorToken(

- "expected-closing-tag-but-got-right-bracket"));

- state = dataState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken(

- "expected-closing-tag-but-got-eof"));

- _addToken(new CharactersToken("</"));

- state = dataState;

- } else {

- // XXX data can be _'_...

- _addToken(new ParseErrorToken(

- "expected-closing-tag-but-got-char", messageParams: {"data": data}));

- stream.unget(data);

- state = bogusCommentState;

- }

- return true;

- }

- bool tagNameState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- state = beforeAttributeNameState;

- } else if (data == ">") {

- emitCurrentToken();

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-tag-name"));

- state = dataState;

- } else if (data == "/") {

- state = selfClosingStartTagState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentTagToken.name = '${currentTagToken.name}\uFFFD';

- } else {

- currentTagToken.name = '${currentTagToken.name}$data';

- // (Don't use charsUntil here, because tag names are

- // very short and it's faster to not do anything fancy)

- }

- return true;

- }

- bool rcdataLessThanSignState() {

- var data = stream.char();

- if (data == "/") {

- temporaryBuffer = "";

- state = rcdataEndTagOpenState;

- } else {

- _addToken(new CharactersToken("<"));

- stream.unget(data);

- state = rcdataState;

- }

- return true;

- }

- bool rcdataEndTagOpenState() {

- var data = stream.char();

- if (isLetter(data)) {

- temporaryBuffer = '${temporaryBuffer}$data';

- state = rcdataEndTagNameState;

- } else {

- _addToken(new CharactersToken("</"));

- stream.unget(data);

- state = rcdataState;

- }

- return true;

- }

- bool _tokenIsAppropriate() {

- return currentToken is TagToken &&

- currentTagToken.name.toLowerCase() == temporaryBuffer.toLowerCase();

- }

- bool rcdataEndTagNameState() {

- var appropriate = _tokenIsAppropriate();

- var data = stream.char();

- if (isWhitespace(data) && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- state = beforeAttributeNameState;

- } else if (data == "/" && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- state = selfClosingStartTagState;

- } else if (data == ">" && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- emitCurrentToken();

- state = dataState;

- } else if (isLetter(data)) {

- temporaryBuffer = '${temporaryBuffer}$data';

- } else {

- _addToken(new CharactersToken("</$temporaryBuffer"));

- stream.unget(data);

- state = rcdataState;

- }

- return true;

- }

- bool rawtextLessThanSignState() {

- var data = stream.char();

- if (data == "/") {

- temporaryBuffer = "";

- state = rawtextEndTagOpenState;

- } else {

- _addToken(new CharactersToken("<"));

- stream.unget(data);

- state = rawtextState;

- }

- return true;

- }

- bool rawtextEndTagOpenState() {

- var data = stream.char();

- if (isLetter(data)) {

- temporaryBuffer = '${temporaryBuffer}$data';

- state = rawtextEndTagNameState;

- } else {

- _addToken(new CharactersToken("</"));

- stream.unget(data);

- state = rawtextState;

- }

- return true;

- }

- bool rawtextEndTagNameState() {

- var appropriate = _tokenIsAppropriate();

- var data = stream.char();

- if (isWhitespace(data) && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- state = beforeAttributeNameState;

- } else if (data == "/" && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- state = selfClosingStartTagState;

- } else if (data == ">" && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- emitCurrentToken();

- state = dataState;

- } else if (isLetter(data)) {

- temporaryBuffer = '${temporaryBuffer}$data';

- } else {

- _addToken(new CharactersToken("</$temporaryBuffer"));

- stream.unget(data);

- state = rawtextState;

- }

- return true;

- }

- bool scriptDataLessThanSignState() {

- var data = stream.char();

- if (data == "/") {

- temporaryBuffer = "";

- state = scriptDataEndTagOpenState;

- } else if (data == "!") {

- _addToken(new CharactersToken("<!"));

- state = scriptDataEscapeStartState;

- } else {

- _addToken(new CharactersToken("<"));

- stream.unget(data);

- state = scriptDataState;

- }

- return true;

- }

- bool scriptDataEndTagOpenState() {

- var data = stream.char();

- if (isLetter(data)) {

- temporaryBuffer = '${temporaryBuffer}$data';

- state = scriptDataEndTagNameState;

- } else {

- _addToken(new CharactersToken("</"));

- stream.unget(data);

- state = scriptDataState;

- }

- return true;

- }

- bool scriptDataEndTagNameState() {

- var appropriate = _tokenIsAppropriate();

- var data = stream.char();

- if (isWhitespace(data) && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- state = beforeAttributeNameState;

- } else if (data == "/" && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- state = selfClosingStartTagState;

- } else if (data == ">" && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- emitCurrentToken();

- state = dataState;

- } else if (isLetter(data)) {

- temporaryBuffer = '${temporaryBuffer}$data';

- } else {

- _addToken(new CharactersToken("</$temporaryBuffer"));

- stream.unget(data);

- state = scriptDataState;

- }

- return true;

- }

- bool scriptDataEscapeStartState() {

- var data = stream.char();

- if (data == "-") {

- _addToken(new CharactersToken("-"));

- state = scriptDataEscapeStartDashState;

- } else {

- stream.unget(data);

- state = scriptDataState;

- }

- return true;

- }

- bool scriptDataEscapeStartDashState() {

- var data = stream.char();

- if (data == "-") {

- _addToken(new CharactersToken("-"));

- state = scriptDataEscapedDashDashState;

- } else {

- stream.unget(data);

- state = scriptDataState;

- }

- return true;

- }

- bool scriptDataEscapedState() {

- var data = stream.char();

- if (data == "-") {

- _addToken(new CharactersToken("-"));

- state = scriptDataEscapedDashState;

- } else if (data == "<") {

- state = scriptDataEscapedLessThanSignState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\uFFFD"));

- } else if (data == EOF) {

- state = dataState;

- } else {

- var chars = stream.charsUntil("<-\u0000");

- _addToken(new CharactersToken("${data}${chars}"));

- }

- return true;

- }

- bool scriptDataEscapedDashState() {

- var data = stream.char();

- if (data == "-") {

- _addToken(new CharactersToken("-"));

- state = scriptDataEscapedDashDashState;

- } else if (data == "<") {

- state = scriptDataEscapedLessThanSignState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\uFFFD"));

- state = scriptDataEscapedState;

- } else if (data == EOF) {

- state = dataState;

- } else {

- _addToken(new CharactersToken(data));

- state = scriptDataEscapedState;

- }

- return true;

- }

- bool scriptDataEscapedDashDashState() {

- var data = stream.char();

- if (data == "-") {

- _addToken(new CharactersToken("-"));

- } else if (data == "<") {

- state = scriptDataEscapedLessThanSignState;

- } else if (data == ">") {

- _addToken(new CharactersToken(">"));

- state = scriptDataState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\uFFFD"));

- state = scriptDataEscapedState;

- } else if (data == EOF) {

- state = dataState;

- } else {

- _addToken(new CharactersToken(data));

- state = scriptDataEscapedState;

- }

- return true;

- }

- bool scriptDataEscapedLessThanSignState() {

- var data = stream.char();

- if (data == "/") {

- temporaryBuffer = "";

- state = scriptDataEscapedEndTagOpenState;

- } else if (isLetter(data)) {

- _addToken(new CharactersToken("<$data"));

- temporaryBuffer = data;

- state = scriptDataDoubleEscapeStartState;

- } else {

- _addToken(new CharactersToken("<"));

- stream.unget(data);

- state = scriptDataEscapedState;

- }

- return true;

- }

- bool scriptDataEscapedEndTagOpenState() {

- var data = stream.char();

- if (isLetter(data)) {

- temporaryBuffer = data;

- state = scriptDataEscapedEndTagNameState;

- } else {

- _addToken(new CharactersToken("</"));

- stream.unget(data);

- state = scriptDataEscapedState;

- }

- return true;

- }

- bool scriptDataEscapedEndTagNameState() {

- var appropriate = _tokenIsAppropriate();

- var data = stream.char();

- if (isWhitespace(data) && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- state = beforeAttributeNameState;

- } else if (data == "/" && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- state = selfClosingStartTagState;

- } else if (data == ">" && appropriate) {

- currentToken = new EndTagToken(temporaryBuffer);

- emitCurrentToken();

- state = dataState;

- } else if (isLetter(data)) {

- temporaryBuffer = '${temporaryBuffer}$data';

- } else {

- _addToken(new CharactersToken("</$temporaryBuffer"));

- stream.unget(data);

- state = scriptDataEscapedState;

- }

- return true;

- }

- bool scriptDataDoubleEscapeStartState() {

- var data = stream.char();

- if (isWhitespace(data) || data == "/" || data == ">") {

- _addToken(new CharactersToken(data));

- if (temporaryBuffer.toLowerCase() == "script") {

- state = scriptDataDoubleEscapedState;

- } else {

- state = scriptDataEscapedState;

- }

- } else if (isLetter(data)) {

- _addToken(new CharactersToken(data));

- temporaryBuffer = '${temporaryBuffer}$data';

- } else {

- stream.unget(data);

- state = scriptDataEscapedState;

- }

- return true;

- }

- bool scriptDataDoubleEscapedState() {

- var data = stream.char();

- if (data == "-") {

- _addToken(new CharactersToken("-"));

- state = scriptDataDoubleEscapedDashState;

- } else if (data == "<") {

- _addToken(new CharactersToken("<"));

- state = scriptDataDoubleEscapedLessThanSignState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\uFFFD"));

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-script-in-script"));

- state = dataState;

- } else {

- _addToken(new CharactersToken(data));

- }

- return true;

- }

- bool scriptDataDoubleEscapedDashState() {

- var data = stream.char();

- if (data == "-") {

- _addToken(new CharactersToken("-"));

- state = scriptDataDoubleEscapedDashDashState;

- } else if (data == "<") {

- _addToken(new CharactersToken("<"));

- state = scriptDataDoubleEscapedLessThanSignState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\uFFFD"));

- state = scriptDataDoubleEscapedState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-script-in-script"));

- state = dataState;

- } else {

- _addToken(new CharactersToken(data));

- state = scriptDataDoubleEscapedState;

- }

- return true;

- }

- // TODO(jmesserly): report bug in original code

- // (was "Dash" instead of "DashDash")

- bool scriptDataDoubleEscapedDashDashState() {

- var data = stream.char();

- if (data == "-") {

- _addToken(new CharactersToken("-"));

- } else if (data == "<") {

- _addToken(new CharactersToken("<"));

- state = scriptDataDoubleEscapedLessThanSignState;

- } else if (data == ">") {

- _addToken(new CharactersToken(">"));

- state = scriptDataState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addToken(new CharactersToken("\uFFFD"));

- state = scriptDataDoubleEscapedState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-script-in-script"));

- state = dataState;

- } else {

- _addToken(new CharactersToken(data));

- state = scriptDataDoubleEscapedState;

- }

- return true;

- }

- bool scriptDataDoubleEscapedLessThanSignState() {

- var data = stream.char();

- if (data == "/") {

- _addToken(new CharactersToken("/"));

- temporaryBuffer = "";

- state = scriptDataDoubleEscapeEndState;

- } else {

- stream.unget(data);

- state = scriptDataDoubleEscapedState;

- }

- return true;

- }

- bool scriptDataDoubleEscapeEndState() {

- var data = stream.char();

- if (isWhitespace(data) || data == "/" || data == ">") {

- _addToken(new CharactersToken(data));

- if (temporaryBuffer.toLowerCase() == "script") {

- state = scriptDataEscapedState;

- } else {

- state = scriptDataDoubleEscapedState;

- }

- } else if (isLetter(data)) {

- _addToken(new CharactersToken(data));

- temporaryBuffer = '${temporaryBuffer}$data';

- } else {

- stream.unget(data);

- state = scriptDataDoubleEscapedState;

- }

- return true;

- }

- bool beforeAttributeNameState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- stream.charsUntil(spaceCharacters, true);

- } else if (isLetter(data)) {

- _addAttribute(data);

- state = attributeNameState;

- } else if (data == ">") {

- emitCurrentToken();

- } else if (data == "/") {

- state = selfClosingStartTagState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("expected-attribute-name-but-got-eof"));

- state = dataState;

- } else if ("'\"=<".contains(data)) {

- _addToken(new ParseErrorToken("invalid-character-in-attribute-name"));

- _addAttribute(data);

- state = attributeNameState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addAttribute("\uFFFD");

- state = attributeNameState;

- } else {

- _addAttribute(data);

- state = attributeNameState;

- }

- return true;

- }

- bool attributeNameState() {

- var data = stream.char();

- bool leavingThisState = true;

- bool emitToken = false;

- if (data == "=") {

- state = beforeAttributeValueState;

- } else if (isLetter(data)) {

- _attributeName = '$_attributeName$data'

- '${stream.charsUntil(asciiLetters, true)}';

- leavingThisState = false;

- } else if (data == ">") {

- // XXX If we emit here the attributes are converted to a dict

- // without being checked and when the code below runs we error

- // because data is a dict not a list

- emitToken = true;

- } else if (isWhitespace(data)) {

- state = afterAttributeNameState;

- } else if (data == "/") {

- state = selfClosingStartTagState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _attributeName = '${_attributeName}\uFFFD';

- leavingThisState = false;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-attribute-name"));

- state = dataState;

- } else if ("'\"<".contains(data)) {

- _addToken(new ParseErrorToken("invalid-character-in-attribute-name"));

- _attributeName = '$_attributeName$data';

- leavingThisState = false;

- } else {

- _attributeName = '$_attributeName$data';

- leavingThisState = false;

- }

- if (leavingThisState) {

- _markAttributeNameEnd(-1);

- // Attributes are not dropped at this stage. That happens when the

- // start tag token is emitted so values can still be safely appended

- // to attributes, but we do want to report the parse error in time.

- if (lowercaseAttrName) {

- _attributeName = asciiUpper2Lower(_attributeName);

- }

- if (_attributeNames == null) _attributeNames = new Set();

- if (_attributeNames.contains(_attributeName)) {

- _addToken(new ParseErrorToken("duplicate-attribute"));

- }

- _attributeNames.add(_attributeName);

- // XXX Fix for above XXX

- if (emitToken) {

- emitCurrentToken();

- }

- return true;

- }

- bool afterAttributeNameState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- stream.charsUntil(spaceCharacters, true);

- } else if (data == "=") {

- state = beforeAttributeValueState;

- } else if (data == ">") {

- emitCurrentToken();

- } else if (isLetter(data)) {

- _addAttribute(data);

- state = attributeNameState;

- } else if (data == "/") {

- state = selfClosingStartTagState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _addAttribute("\uFFFD");

- state = attributeNameState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("expected-end-of-tag-but-got-eof"));

- state = dataState;

- } else if ("'\"<".contains(data)) {

- _addToken(new ParseErrorToken("invalid-character-after-attribute-name"));

- _addAttribute(data);

- state = attributeNameState;

- } else {

- _addAttribute(data);

- state = attributeNameState;

- }

- return true;

- }

- bool beforeAttributeValueState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- stream.charsUntil(spaceCharacters, true);

- } else if (data == "\"") {

- _markAttributeValueStart(0);

- state = attributeValueDoubleQuotedState;

- } else if (data == "&") {

- state = attributeValueUnQuotedState;

- stream.unget(data);

- _markAttributeValueStart(0);

- } else if (data == "'") {

- _markAttributeValueStart(0);

- state = attributeValueSingleQuotedState;

- } else if (data == ">") {

- _addToken(new ParseErrorToken(

- "expected-attribute-value-but-got-right-bracket"));

- emitCurrentToken();

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _markAttributeValueStart(-1);

- _attributeValue = '${_attributeValue}\uFFFD';

- state = attributeValueUnQuotedState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("expected-attribute-value-but-got-eof"));

- state = dataState;

- } else if ("=<`".contains(data)) {

- _addToken(new ParseErrorToken("equals-in-unquoted-attribute-value"));

- _markAttributeValueStart(-1);

- _attributeValue = '$_attributeValue$data';

- state = attributeValueUnQuotedState;

- } else {

- _markAttributeValueStart(-1);

- _attributeValue = '$_attributeValue$data';

- state = attributeValueUnQuotedState;

- }

- return true;

- }

- bool attributeValueDoubleQuotedState() {

- var data = stream.char();

- if (data == "\"") {

- _markAttributeValueEnd(-1);

- _markAttributeEnd(0);

- state = afterAttributeValueState;

- } else if (data == "&") {

- processEntityInAttribute('"');

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _attributeValue = '${_attributeValue}\uFFFD';

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-attribute-value-double-quote"));

- _markAttributeValueEnd(-1);

- state = dataState;

- } else {

- _attributeValue = '$_attributeValue$data${stream.charsUntil("\"&")}';

- }

- return true;

- }

- bool attributeValueSingleQuotedState() {

- var data = stream.char();

- if (data == "'") {

- _markAttributeValueEnd(-1);

- _markAttributeEnd(0);

- state = afterAttributeValueState;

- } else if (data == "&") {

- processEntityInAttribute("'");

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _attributeValue = '${_attributeValue}\uFFFD';

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-attribute-value-single-quote"));

- _markAttributeValueEnd(-1);

- state = dataState;

- } else {

- _attributeValue = '$_attributeValue$data${stream.charsUntil("\'&")}';

- }

- return true;

- }

- bool attributeValueUnQuotedState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- _markAttributeValueEnd(-1);

- state = beforeAttributeNameState;

- } else if (data == "&") {

- processEntityInAttribute(">");

- } else if (data == ">") {

- _markAttributeValueEnd(-1);

- emitCurrentToken();

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-attribute-value-no-quotes"));

- _markAttributeValueEnd(-1);

- state = dataState;

- } else if ('"\'=<`'.contains(data)) {

- _addToken(new ParseErrorToken(

- "unexpected-character-in-unquoted-attribute-value"));

- _attributeValue = '$_attributeValue$data';

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- _attributeValue = '${_attributeValue}\uFFFD';

- } else {

- _attributeValue = '$_attributeValue$data'

- '${stream.charsUntil("&>\"\'=<`$spaceCharacters")}';

- }

- return true;

- }

- bool afterAttributeValueState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- state = beforeAttributeNameState;

- } else if (data == ">") {

- emitCurrentToken();

- } else if (data == "/") {

- state = selfClosingStartTagState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("unexpected-EOF-after-attribute-value"));

- stream.unget(data);

- state = dataState;

- } else {

- _addToken(new ParseErrorToken(

- "unexpected-character-after-attribute-value"));

- stream.unget(data);

- state = beforeAttributeNameState;

- }

- return true;

- }

- bool selfClosingStartTagState() {

- var data = stream.char();

- if (data == ">") {

- currentTagToken.selfClosing = true;

- emitCurrentToken();

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("unexpected-EOF-after-solidus-in-tag"));

- stream.unget(data);

- state = dataState;

- } else {

- _addToken(new ParseErrorToken(

- "unexpected-character-after-soldius-in-tag"));

- stream.unget(data);

- state = beforeAttributeNameState;

- }

- return true;

- }

- bool bogusCommentState() {

- // Make a new comment token and give it as value all the characters

- // until the first > or EOF (charsUntil checks for EOF automatically)

- // and emit it.

- var data = stream.charsUntil(">");

- data = data.replaceAll("\u0000", "\uFFFD");

- _addToken(new CommentToken(data));

- // Eat the character directly after the bogus comment which is either a

- // ">" or an EOF.

- stream.char();

- state = dataState;

- return true;

- }

- bool markupDeclarationOpenState() {

- var charStack = [stream.char()];

- if (charStack.last == "-") {

- charStack.add(stream.char());

- if (charStack.last == "-") {

- currentToken = new CommentToken("");

- state = commentStartState;

- return true;

- }

- } else if (charStack.last == 'd' || charStack.last == 'D') {

- var matched = true;

- for (var expected in const ['oO', 'cC', 'tT', 'yY', 'pP', 'eE']) {

- var char = stream.char();

- charStack.add(char);

- if (char == EOF || !expected.contains(char)) {

- matched = false;

- break;

- }

- if (matched) {

- currentToken = new DoctypeToken(correct: true);

- state = doctypeState;

- return true;

- }

- } else if (charStack.last == "[" &&

- parser != null && parser.tree.openElements.length > 0 &&

- parser.tree.openElements.last.namespaceUri

- != parser.tree.defaultNamespace) {

- var matched = true;

- for (var expected in const ["C", "D", "A", "T", "A", "["]) {

- charStack.add(stream.char());

- if (charStack.last != expected) {

- matched = false;

- break;

- }

- if (matched) {

- state = cdataSectionState;

- return true;

- }

- _addToken(new ParseErrorToken("expected-dashes-or-doctype"));

- while (charStack.length > 0) {

- stream.unget(charStack.removeLast());

- }

- state = bogusCommentState;

- return true;

- }

- bool commentStartState() {

- var data = stream.char();

- if (data == "-") {

- state = commentStartDashState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentStringToken.data = '${currentStringToken.data}\uFFFD';

- } else if (data == ">") {

- _addToken(new ParseErrorToken("incorrect-comment"));

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-comment"));

- _addToken(currentToken);

- state = dataState;

- } else {

- currentStringToken.data = '${currentStringToken.data}$data';

- state = commentState;

- }

- return true;

- }

- bool commentStartDashState() {

- var data = stream.char();

- if (data == "-") {

- state = commentEndState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentStringToken.data = '${currentStringToken.data}-\uFFFD';

- } else if (data == ">") {

- _addToken(new ParseErrorToken("incorrect-comment"));

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-comment"));

- _addToken(currentToken);

- state = dataState;

- } else {

- currentStringToken.data = '${currentStringToken.data}-${data}';

- state = commentState;

- }

- return true;

- }

- bool commentState() {

- var data = stream.char();

- if (data == "-") {

- state = commentEndDashState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentStringToken.data = '${currentStringToken.data}\uFFFD';

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-comment"));

- _addToken(currentToken);

- state = dataState;

- } else {

- currentStringToken.data = '${currentStringToken.data}$data'

- '${stream.charsUntil("-\u0000")}';

- }

- return true;

- }

- bool commentEndDashState() {

- var data = stream.char();

- if (data == "-") {

- state = commentEndState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentStringToken.data = "${currentStringToken.data}-\uFFFD";

- state = commentState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-comment-end-dash"));

- _addToken(currentToken);

- state = dataState;

- } else {

- currentStringToken.data = "${currentStringToken.data}-${data}";

- state = commentState;

- }

- return true;

- }

- bool commentEndState() {

- var data = stream.char();

- if (data == ">") {

- _addToken(currentToken);

- state = dataState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentStringToken.data = '${currentStringToken.data}--\uFFFD';

- state = commentState;

- } else if (data == "!") {

- _addToken(new ParseErrorToken(

- "unexpected-bang-after-double-dash-in-comment"));

- state = commentEndBangState;

- } else if (data == "-") {

- _addToken(new ParseErrorToken(

- "unexpected-dash-after-double-dash-in-comment"));

- currentStringToken.data = '${currentStringToken.data}$data';

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-comment-double-dash"));

- _addToken(currentToken);

- state = dataState;

- } else {

- // XXX

- _addToken(new ParseErrorToken("unexpected-char-in-comment"));

- currentStringToken.data = "${currentStringToken.data}--${data}";

- state = commentState;

- }

- return true;

- }

- bool commentEndBangState() {

- var data = stream.char();

- if (data == ">") {

- _addToken(currentToken);

- state = dataState;

- } else if (data == "-") {

- currentStringToken.data = '${currentStringToken.data}--!';

- state = commentEndDashState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentStringToken.data = '${currentStringToken.data}--!\uFFFD';

- state = commentState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-comment-end-bang-state"));

- _addToken(currentToken);

- state = dataState;

- } else {

- currentStringToken.data = "${currentStringToken.data}--!${data}";

- state = commentState;

- }

- return true;

- }

- bool doctypeState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- state = beforeDoctypeNameState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken(

- "expected-doctype-name-but-got-eof"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- _addToken(new ParseErrorToken("need-space-after-doctype"));

- stream.unget(data);

- state = beforeDoctypeNameState;

- }

- return true;

- }

- bool beforeDoctypeNameState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- return true;

- } else if (data == ">") {

- _addToken(new ParseErrorToken(

- "expected-doctype-name-but-got-right-bracket"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentDoctypeToken.name = "\uFFFD";

- state = doctypeNameState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken(

- "expected-doctype-name-but-got-eof"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- currentDoctypeToken.name = data;

- state = doctypeNameState;

- }

- return true;

- }

- bool doctypeNameState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- currentDoctypeToken.name = asciiUpper2Lower(currentDoctypeToken.name);

- state = afterDoctypeNameState;

- } else if (data == ">") {

- currentDoctypeToken.name = asciiUpper2Lower(currentDoctypeToken.name);

- _addToken(currentToken);

- state = dataState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentDoctypeToken.name = "${currentDoctypeToken.name}\uFFFD";

- state = doctypeNameState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype-name"));

- currentDoctypeToken.correct = false;

- currentDoctypeToken.name = asciiUpper2Lower(currentDoctypeToken.name);

- _addToken(currentToken);

- state = dataState;

- } else {

- currentDoctypeToken.name = '${currentDoctypeToken.name}$data';

- }

- return true;

- }

- bool afterDoctypeNameState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- return true;

- } else if (data == ">") {

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- currentDoctypeToken.correct = false;

- stream.unget(data);

- _addToken(new ParseErrorToken("eof-in-doctype"));

- _addToken(currentToken);

- state = dataState;

- } else {

- if (data == "p" || data == "P") {

- // TODO(jmesserly): would be nice to have a helper for this.

- var matched = true;

- for (var expected in const ["uU", "bB", "lL", "iI", "cC"]) {

- data = stream.char();

- if (data == EOF || !expected.contains(data)) {

- matched = false;

- break;

- }

- if (matched) {

- state = afterDoctypePublicKeywordState;

- return true;

- }

- } else if (data == "s" || data == "S") {

- var matched = true;

- for (var expected in const ["yY", "sS", "tT", "eE", "mM"]) {

- data = stream.char();

- if (data == EOF || !expected.contains(data)) {

- matched = false;

- break;

- }

- if (matched) {

- state = afterDoctypeSystemKeywordState;

- return true;

- }

- // All the characters read before the current 'data' will be

- // [a-zA-Z], so they're garbage in the bogus doctype and can be

- // discarded; only the latest character might be '>' or EOF

- // and needs to be ungetted

- stream.unget(data);

- _addToken(new ParseErrorToken(

- "expected-space-or-right-bracket-in-doctype",

- messageParams: {"data": data}));

- currentDoctypeToken.correct = false;

- state = bogusDoctypeState;

- }

- return true;

- }

- bool afterDoctypePublicKeywordState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- state = beforeDoctypePublicIdentifierState;

- } else if (data == "'" || data == '"') {

- _addToken(new ParseErrorToken("unexpected-char-in-doctype"));

- stream.unget(data);

- state = beforeDoctypePublicIdentifierState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- stream.unget(data);

- state = beforeDoctypePublicIdentifierState;

- }

- return true;

- }

- bool beforeDoctypePublicIdentifierState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- return true;

- } else if (data == "\"") {

- currentDoctypeToken.publicId = "";

- state = doctypePublicIdentifierDoubleQuotedState;

- } else if (data == "'") {

- currentDoctypeToken.publicId = "";

- state = doctypePublicIdentifierSingleQuotedState;

- } else if (data == ">") {

- _addToken(new ParseErrorToken("unexpected-end-of-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- _addToken(new ParseErrorToken("unexpected-char-in-doctype"));

- currentDoctypeToken.correct = false;

- state = bogusDoctypeState;

- }

- return true;

- }

- bool doctypePublicIdentifierDoubleQuotedState() {

- var data = stream.char();

- if (data == '"') {

- state = afterDoctypePublicIdentifierState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentDoctypeToken.publicId = "${currentDoctypeToken.publicId}\uFFFD";

- } else if (data == ">") {

- _addToken(new ParseErrorToken("unexpected-end-of-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- currentDoctypeToken.publicId = '${currentDoctypeToken.publicId}$data';

- }

- return true;

- }

- bool doctypePublicIdentifierSingleQuotedState() {

- var data = stream.char();

- if (data == "'") {

- state = afterDoctypePublicIdentifierState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentDoctypeToken.publicId = "${currentDoctypeToken.publicId}\uFFFD";

- } else if (data == ">") {

- _addToken(new ParseErrorToken("unexpected-end-of-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- currentDoctypeToken.publicId = '${currentDoctypeToken.publicId}$data';

- }

- return true;

- }

- bool afterDoctypePublicIdentifierState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- state = betweenDoctypePublicAndSystemIdentifiersState;

- } else if (data == ">") {

- _addToken(currentToken);

- state = dataState;

- } else if (data == '"') {

- _addToken(new ParseErrorToken("unexpected-char-in-doctype"));

- currentDoctypeToken.systemId = "";

- state = doctypeSystemIdentifierDoubleQuotedState;

- } else if (data == "'") {

- _addToken(new ParseErrorToken("unexpected-char-in-doctype"));

- currentDoctypeToken.systemId = "";

- state = doctypeSystemIdentifierSingleQuotedState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- _addToken(new ParseErrorToken("unexpected-char-in-doctype"));

- currentDoctypeToken.correct = false;

- state = bogusDoctypeState;

- }

- return true;

- }

- bool betweenDoctypePublicAndSystemIdentifiersState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- return true;

- } else if (data == ">") {

- _addToken(currentToken);

- state = dataState;

- } else if (data == '"') {

- currentDoctypeToken.systemId = "";

- state = doctypeSystemIdentifierDoubleQuotedState;

- } else if (data == "'") {

- currentDoctypeToken.systemId = "";

- state = doctypeSystemIdentifierSingleQuotedState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- _addToken(new ParseErrorToken("unexpected-char-in-doctype"));

- currentDoctypeToken.correct = false;

- state = bogusDoctypeState;

- }

- return true;

- }

- bool afterDoctypeSystemKeywordState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- state = beforeDoctypeSystemIdentifierState;

- } else if (data == "'" || data == '"') {

- _addToken(new ParseErrorToken("unexpected-char-in-doctype"));

- stream.unget(data);

- state = beforeDoctypeSystemIdentifierState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- stream.unget(data);

- state = beforeDoctypeSystemIdentifierState;

- }

- return true;

- }

- bool beforeDoctypeSystemIdentifierState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- return true;

- } else if (data == "\"") {

- currentDoctypeToken.systemId = "";

- state = doctypeSystemIdentifierDoubleQuotedState;

- } else if (data == "'") {

- currentDoctypeToken.systemId = "";

- state = doctypeSystemIdentifierSingleQuotedState;

- } else if (data == ">") {

- _addToken(new ParseErrorToken("unexpected-char-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- _addToken(new ParseErrorToken("unexpected-char-in-doctype"));

- currentDoctypeToken.correct = false;

- state = bogusDoctypeState;

- }

- return true;

- }

- bool doctypeSystemIdentifierDoubleQuotedState() {

- var data = stream.char();

- if (data == "\"") {

- state = afterDoctypeSystemIdentifierState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentDoctypeToken.systemId = "${currentDoctypeToken.systemId}\uFFFD";

- } else if (data == ">") {

- _addToken(new ParseErrorToken("unexpected-end-of-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- currentDoctypeToken.systemId = '${currentDoctypeToken.systemId}$data';

- }

- return true;

- }

- bool doctypeSystemIdentifierSingleQuotedState() {

- var data = stream.char();

- if (data == "'") {

- state = afterDoctypeSystemIdentifierState;

- } else if (data == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- currentDoctypeToken.systemId = "${currentDoctypeToken.systemId}\uFFFD";

- } else if (data == ">") {

- _addToken(new ParseErrorToken("unexpected-end-of-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- currentDoctypeToken.systemId = '${currentDoctypeToken.systemId}$data';

- }

- return true;

- }

- bool afterDoctypeSystemIdentifierState() {

- var data = stream.char();

- if (isWhitespace(data)) {

- return true;

- } else if (data == ">") {

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- _addToken(new ParseErrorToken("eof-in-doctype"));

- currentDoctypeToken.correct = false;

- _addToken(currentToken);

- state = dataState;

- } else {

- _addToken(new ParseErrorToken("unexpected-char-in-doctype"));

- state = bogusDoctypeState;

- }

- return true;

- }

- bool bogusDoctypeState() {

- var data = stream.char();

- if (data == ">") {

- _addToken(currentToken);

- state = dataState;

- } else if (data == EOF) {

- // XXX EMIT

- stream.unget(data);

- _addToken(currentToken);

- state = dataState;

- }

- return true;

- }

- bool cdataSectionState() {

- var data = [];

- int matchedEnd = 0;

- while (true) {

- var ch = stream.char();

- if (ch == EOF) {

- break;

- }

- // Deal with null here rather than in the parser

- if (ch == "\u0000") {

- _addToken(new ParseErrorToken("invalid-codepoint"));

- ch = "\uFFFD";

- }

- data.add(ch);

- // TODO(jmesserly): it'd be nice if we had an easier way to match the end,

- // perhaps with a "peek" API.

- if (ch == "]" && matchedEnd < 2) {

- matchedEnd++;

- } else if (ch == ">" && matchedEnd == 2) {

- // Remove "]]>" from the end.

- data.removeLast();

- break;

- } else {

- matchedEnd = 0;

- }

- if (data.length > 0) {

- _addToken(new CharactersToken(data.join()));

- }

- state = dataState;

- return true;

- }

« no previous file with comments | « pkg/third_party/html5lib/lib/src/token.dart ('k') | pkg/third_party/html5lib/lib/src/treebuilder.dart » ('j') | no next file with comments »