| Index: third_party/pkg/html5lib/lib/src/tokenizer.dart
|
| diff --git a/third_party/pkg/html5lib/lib/src/tokenizer.dart b/third_party/pkg/html5lib/lib/src/tokenizer.dart
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..ee867b7cc7cc549e94439ec7f68133b706e44154
|
| --- /dev/null
|
| +++ b/third_party/pkg/html5lib/lib/src/tokenizer.dart
|
| @@ -0,0 +1,1900 @@
|
| +library tokenizer;
|
| +
|
| +import 'dart:collection';
|
| +import 'dart:math';
|
| +import 'package:html5lib/parser.dart' show HtmlParser;
|
| +import 'package:source_maps/span.dart' show Span, FileSpan;
|
| +import 'constants.dart';
|
| +import 'inputstream.dart';
|
| +import 'token.dart';
|
| +import 'utils.dart';
|
| +
|
| +// Group entities by their first character, for faster lookups
|
| +
|
| +// TODO(jmesserly): we could use a better data structure here like a trie, if
|
| +// we had it implemented in Dart.
|
| +Map<String, List<String>> entitiesByFirstChar = (() {
|
| + var result = {};
|
| + for (var k in entities.keys) {
|
| + result.putIfAbsent(k[0], () => []).add(k);
|
| + }
|
| + return result;
|
| +})();
|
| +
|
| +// TODO(jmesserly): lots of ways to make this faster:
|
| +// - use char codes everywhere instead of 1-char strings
|
| +// - use switch instead of contains, indexOf
|
| +// - use switch instead of the sequential if tests
|
| +// - avoid string concat
|
| +
|
| +/**
|
| + * This class takes care of tokenizing HTML.
|
| + */
|
| +class HtmlTokenizer implements Iterator<Token> {
|
| + // TODO(jmesserly): a lot of these could be made private
|
| +
|
| + final HtmlInputStream stream;
|
| +
|
| + final bool lowercaseElementName;
|
| +
|
| + final bool lowercaseAttrName;
|
| +
|
| + /** True to generate spans in for [Token.span]. */
|
| + final bool generateSpans;
|
| +
|
| + /** True to generate spans for attributes. */
|
| + final bool attributeSpans;
|
| +
|
| + /**
|
| + * This reference to the parser is used for correct CDATA handling.
|
| + * The [HtmlParser] will set this at construction time.
|
| + */
|
| + HtmlParser parser;
|
| +
|
| + final Queue<Token> tokenQueue;
|
| +
|
| + /** Holds the token that is currently being processed. */
|
| + Token currentToken;
|
| +
|
| + /**
|
| + * Holds a reference to the method to be invoked for the next parser state.
|
| + */
|
| + Predicate state;
|
| +
|
| + String temporaryBuffer;
|
| +
|
| + int _lastOffset;
|
| +
|
| + // TODO(jmesserly): ideally this would be a LinkedHashMap and we wouldn't add
|
| + // an item until it's ready. But the code doesn't have a clear notion of when
|
| + // it's "done" with the attribute.
|
| + List<TagAttribute> _attributes;
|
| + Set<String> _attributeNames;
|
| +
|
| + HtmlTokenizer(doc, {String encoding, bool parseMeta: true,
|
| + this.lowercaseElementName: true, this.lowercaseAttrName: true,
|
| + bool generateSpans: false, String sourceUrl, this.attributeSpans: false})
|
| + : stream = new HtmlInputStream(
|
| + doc, encoding, parseMeta, generateSpans, sourceUrl),
|
| + tokenQueue = new Queue(),
|
| + generateSpans = generateSpans {
|
| + reset();
|
| + }
|
| +
|
| + TagToken get currentTagToken => currentToken;
|
| + DoctypeToken get currentDoctypeToken => currentToken;
|
| + StringToken get currentStringToken => currentToken;
|
| +
|
| + Token _current;
|
| + Token get current => _current;
|
| +
|
| + String get _attributeName => _attributes.last.name;
|
| + set _attributeName(String value) {
|
| + _attributes.last.name = value;
|
| + }
|
| +
|
| + String get _attributeValue => _attributes.last.value;
|
| + set _attributeValue(String value) {
|
| + _attributes.last.value = value;
|
| + }
|
| +
|
| + void _markAttributeEnd(int offset) {
|
| + if (attributeSpans) _attributes.last.end = stream.position + offset;
|
| + }
|
| +
|
| + void _markAttributeValueStart(int offset) {
|
| + if (attributeSpans) _attributes.last.startValue = stream.position + offset;
|
| + }
|
| +
|
| + void _markAttributeValueEnd(int offset) {
|
| + if (attributeSpans) {
|
| + _attributes.last.endValue = stream.position + offset;
|
| + _markAttributeEnd(offset);
|
| + }
|
| + }
|
| +
|
| + // Note: we could track the name span here, if we need it.
|
| + void _markAttributeNameEnd(int offset) => _markAttributeEnd(offset);
|
| +
|
| + void _addAttribute(String name) {
|
| + if (_attributes == null) _attributes = [];
|
| + var attr = new TagAttribute(name);
|
| + _attributes.add(attr);
|
| + if (attributeSpans) attr.start = stream.position - name.length;
|
| + }
|
| +
|
| + /**
|
| + * This is where the magic happens.
|
| + *
|
| + * We do our usually processing through the states and when we have a token
|
| + * to return we yield the token which pauses processing until the next token
|
| + * is requested.
|
| + */
|
| + bool moveNext() {
|
| + // Start processing. When EOF is reached state will return false;
|
| + // instead of true and the loop will terminate.
|
| + while (stream.errors.length == 0 && tokenQueue.length == 0) {
|
| + if (!state()) {
|
| + _current = null;
|
| + return false;
|
| + }
|
| + }
|
| + if (stream.errors.length > 0) {
|
| + _current = new ParseErrorToken(stream.errors.removeFirst());
|
| + } else {
|
| + assert (tokenQueue.length > 0);
|
| + _current = tokenQueue.removeFirst();
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + /**
|
| + * Resets the tokenizer state. Calling this does not reset the [stream] or
|
| + * the [parser].
|
| + */
|
| + void reset() {
|
| + _lastOffset = 0;
|
| + tokenQueue.clear();
|
| + currentToken = null;
|
| + temporaryBuffer = null;
|
| + _attributes = null;
|
| + _attributeNames = null;
|
| + state = dataState;
|
| + }
|
| +
|
| + /** Adds a token to the queue. Sets the span if needed. */
|
| + void _addToken(Token token) {
|
| + if (generateSpans && token.span == null) {
|
| + int offset = stream.position;
|
| + token.span = new FileSpan(stream.fileInfo, _lastOffset, offset);
|
| + if (token is! ParseErrorToken) {
|
| + _lastOffset = offset;
|
| + }
|
| + }
|
| + tokenQueue.add(token);
|
| + }
|
| +
|
| + /**
|
| + * This function returns either U+FFFD or the character based on the
|
| + * decimal or hexadecimal representation. It also discards ";" if present.
|
| + * If not present it will add a [ParseErrorToken].
|
| + */
|
| + String consumeNumberEntity(bool isHex) {
|
| + var allowed = isDigit;
|
| + var radix = 10;
|
| + if (isHex) {
|
| + allowed = isHexDigit;
|
| + radix = 16;
|
| + }
|
| +
|
| + var charStack = [];
|
| +
|
| + // Consume all the characters that are in range while making sure we
|
| + // don't hit an EOF.
|
| + var c = stream.char();
|
| + while (allowed(c) && c != EOF) {
|
| + charStack.add(c);
|
| + c = stream.char();
|
| + }
|
| +
|
| + // Convert the set of characters consumed to an int.
|
| + var charAsInt = parseIntRadix(charStack.join(), radix);
|
| +
|
| + // Certain characters get replaced with others
|
| + var char = replacementCharacters[charAsInt];
|
| + if (char != null) {
|
| + _addToken(new ParseErrorToken(
|
| + "illegal-codepoint-for-numeric-entity",
|
| + messageParams: {"charAsInt": charAsInt}));
|
| + } else if ((0xD800 <= charAsInt && charAsInt <= 0xDFFF)
|
| + || (charAsInt > 0x10FFFF)) {
|
| + char = "\uFFFD";
|
| + _addToken(new ParseErrorToken(
|
| + "illegal-codepoint-for-numeric-entity",
|
| + messageParams: {"charAsInt": charAsInt}));
|
| + } else {
|
| + // Should speed up this check somehow (e.g. move the set to a constant)
|
| + if ((0x0001 <= charAsInt && charAsInt <= 0x0008) ||
|
| + (0x000E <= charAsInt && charAsInt <= 0x001F) ||
|
| + (0x007F <= charAsInt && charAsInt <= 0x009F) ||
|
| + (0xFDD0 <= charAsInt && charAsInt <= 0xFDEF) ||
|
| + const [0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,
|
| + 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
| + 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
|
| + 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
|
| + 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE,
|
| + 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE,
|
| + 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
| + 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE,
|
| + 0xFFFFF, 0x10FFFE, 0x10FFFF].contains(charAsInt)) {
|
| + _addToken(new ParseErrorToken(
|
| + "illegal-codepoint-for-numeric-entity",
|
| + messageParams: {"charAsInt": charAsInt}));
|
| + }
|
| + char = new String.fromCharCodes([charAsInt]);
|
| + }
|
| +
|
| + // Discard the ; if present. Otherwise, put it back on the queue and
|
| + // invoke parseError on parser.
|
| + if (c != ";") {
|
| + _addToken(new ParseErrorToken(
|
| + "numeric-entity-without-semicolon"));
|
| + stream.unget(c);
|
| + }
|
| + return char;
|
| + }
|
| +
|
| + void consumeEntity({String allowedChar, bool fromAttribute: false}) {
|
| + // Initialise to the default output for when no entity is matched
|
| + var output = "&";
|
| +
|
| + var charStack = [stream.char()];
|
| + if (isWhitespace(charStack[0]) || charStack[0] == '<' || charStack[0] == '&'
|
| + || charStack[0] == EOF || allowedChar == charStack[0]) {
|
| + stream.unget(charStack[0]);
|
| + } else if (charStack[0] == "#") {
|
| + // Read the next character to see if it's hex or decimal
|
| + bool hex = false;
|
| + charStack.add(stream.char());
|
| + if (charStack.last == 'x' || charStack.last == 'X') {
|
| + hex = true;
|
| + charStack.add(stream.char());
|
| + }
|
| +
|
| + // charStack.last should be the first digit
|
| + if (hex && isHexDigit(charStack.last) ||
|
| + (!hex && isDigit(charStack.last))) {
|
| + // At least one digit found, so consume the whole number
|
| + stream.unget(charStack.last);
|
| + output = consumeNumberEntity(hex);
|
| + } else {
|
| + // No digits found
|
| + _addToken(new ParseErrorToken("expected-numeric-entity"));
|
| + stream.unget(charStack.removeLast());
|
| + output = "&${charStack.join()}";
|
| + }
|
| + } else {
|
| + // At this point in the process might have named entity. Entities
|
| + // are stored in the global variable "entities".
|
| + //
|
| + // Consume characters and compare to these to a substring of the
|
| + // entity names in the list until the substring no longer matches.
|
| + var filteredEntityList = entitiesByFirstChar[charStack[0]];
|
| + if (filteredEntityList == null) filteredEntityList = const [];
|
| +
|
| + while (charStack.last != EOF) {
|
| + var name = charStack.join();
|
| + filteredEntityList = filteredEntityList.where(
|
| + (e) => e.startsWith(name)).toList();
|
| +
|
| + if (filteredEntityList.length == 0) {
|
| + break;
|
| + }
|
| + charStack.add(stream.char());
|
| + }
|
| +
|
| + // At this point we have a string that starts with some characters
|
| + // that may match an entity
|
| + String entityName = null;
|
| +
|
| + // Try to find the longest entity the string will match to take care
|
| + // of ¬i for instance.
|
| +
|
| + int entityLen;
|
| + for (entityLen = charStack.length - 1; entityLen > 1; entityLen--) {
|
| + var possibleEntityName = charStack.sublist(0, entityLen).join();
|
| + if (entities.containsKey(possibleEntityName)) {
|
| + entityName = possibleEntityName;
|
| + break;
|
| + }
|
| + }
|
| +
|
| + if (entityName != null) {
|
| + var lastChar = entityName[entityName.length - 1];
|
| + if (lastChar != ";") {
|
| + _addToken(new ParseErrorToken(
|
| + "named-entity-without-semicolon"));
|
| + }
|
| + if (lastChar != ";" && fromAttribute &&
|
| + (isLetterOrDigit(charStack[entityLen]) ||
|
| + charStack[entityLen] == '=')) {
|
| + stream.unget(charStack.removeLast());
|
| + output = "&${charStack.join()}";
|
| + } else {
|
| + output = entities[entityName];
|
| + stream.unget(charStack.removeLast());
|
| + output = '${output}${slice(charStack, entityLen).join()}';
|
| + }
|
| + } else {
|
| + _addToken(new ParseErrorToken("expected-named-entity"));
|
| + stream.unget(charStack.removeLast());
|
| + output = "&${charStack.join()}";
|
| + }
|
| + }
|
| + if (fromAttribute) {
|
| + _attributeValue = '$_attributeValue$output';
|
| + } else {
|
| + var token;
|
| + if (isWhitespace(output)) {
|
| + token = new SpaceCharactersToken(output);
|
| + } else {
|
| + token = new CharactersToken(output);
|
| + }
|
| + _addToken(token);
|
| + }
|
| + }
|
| +
|
| + /** This method replaces the need for "entityInAttributeValueState". */
|
| + void processEntityInAttribute(String allowedChar) {
|
| + consumeEntity(allowedChar: allowedChar, fromAttribute: true);
|
| + }
|
| +
|
| + /**
|
| + * This method is a generic handler for emitting the tags. It also sets
|
| + * the state to "data" because that's what's needed after a token has been
|
| + * emitted.
|
| + */
|
| + void emitCurrentToken() {
|
| + var token = currentToken;
|
| + // Add token to the queue to be yielded
|
| + if (token is TagToken) {
|
| + if (lowercaseElementName) {
|
| + token.name = asciiUpper2Lower(token.name);
|
| + }
|
| + if (token is EndTagToken) {
|
| + if (_attributes != null) {
|
| + _addToken(new ParseErrorToken("attributes-in-end-tag"));
|
| + }
|
| + if (token.selfClosing) {
|
| + _addToken(new ParseErrorToken("this-closing-flag-on-end-tag"));
|
| + }
|
| + } else if (token is StartTagToken) {
|
| + // HTML5 specific normalizations to the token stream.
|
| + // Convert the list into a map where first key wins.
|
| + token.data = new LinkedHashMap<Object, String>();
|
| + if (_attributes != null) {
|
| + for (var attr in _attributes) {
|
| + token.data.putIfAbsent(attr.name, () => attr.value);
|
| + }
|
| + if (attributeSpans) token.attributeSpans = _attributes;
|
| + }
|
| + }
|
| + _attributes = null;
|
| + _attributeNames = null;
|
| + }
|
| + _addToken(token);
|
| + state = dataState;
|
| + }
|
| +
|
| + // Below are the various tokenizer states worked out.
|
| +
|
| + bool dataState() {
|
| + var data = stream.char();
|
| + if (data == "&") {
|
| + state = entityDataState;
|
| + } else if (data == "<") {
|
| + state = tagOpenState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\u0000"));
|
| + } else if (data == EOF) {
|
| + // Tokenization ends.
|
| + return false;
|
| + } else if (isWhitespace(data)) {
|
| + // Directly after emitting a token you switch back to the "data
|
| + // state". At that point spaceCharacters are important so they are
|
| + // emitted separately.
|
| + _addToken(new SpaceCharactersToken(
|
| + '${data}${stream.charsUntil(spaceCharacters, true)}'));
|
| + // No need to update lastFourChars here, since the first space will
|
| + // have already been appended to lastFourChars and will have broken
|
| + // any <!-- or --> sequences
|
| + } else {
|
| + var chars = stream.charsUntil("&<\u0000");
|
| + _addToken(new CharactersToken('${data}${chars}'));
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool entityDataState() {
|
| + consumeEntity();
|
| + state = dataState;
|
| + return true;
|
| + }
|
| +
|
| + bool rcdataState() {
|
| + var data = stream.char();
|
| + if (data == "&") {
|
| + state = characterReferenceInRcdata;
|
| + } else if (data == "<") {
|
| + state = rcdataLessThanSignState;
|
| + } else if (data == EOF) {
|
| + // Tokenization ends.
|
| + return false;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\uFFFD"));
|
| + } else if (isWhitespace(data)) {
|
| + // Directly after emitting a token you switch back to the "data
|
| + // state". At that point spaceCharacters are important so they are
|
| + // emitted separately.
|
| + _addToken(new SpaceCharactersToken(
|
| + '${data}${stream.charsUntil(spaceCharacters, true)}'));
|
| + } else {
|
| + var chars = stream.charsUntil("&<");
|
| + _addToken(new CharactersToken('${data}${chars}'));
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool characterReferenceInRcdata() {
|
| + consumeEntity();
|
| + state = rcdataState;
|
| + return true;
|
| + }
|
| +
|
| + bool rawtextState() {
|
| + var data = stream.char();
|
| + if (data == "<") {
|
| + state = rawtextLessThanSignState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\uFFFD"));
|
| + } else if (data == EOF) {
|
| + // Tokenization ends.
|
| + return false;
|
| + } else {
|
| + var chars = stream.charsUntil("<\u0000");
|
| + _addToken(new CharactersToken("${data}${chars}"));
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataState() {
|
| + var data = stream.char();
|
| + if (data == "<") {
|
| + state = scriptDataLessThanSignState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\uFFFD"));
|
| + } else if (data == EOF) {
|
| + // Tokenization ends.
|
| + return false;
|
| + } else {
|
| + var chars = stream.charsUntil("<\u0000");
|
| + _addToken(new CharactersToken("${data}${chars}"));
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool plaintextState() {
|
| + var data = stream.char();
|
| + if (data == EOF) {
|
| + // Tokenization ends.
|
| + return false;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\uFFFD"));
|
| + } else {
|
| + _addToken(new CharactersToken(
|
| + '${data}${stream.charsUntil("\u0000")}'));
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool tagOpenState() {
|
| + var data = stream.char();
|
| + if (data == "!") {
|
| + state = markupDeclarationOpenState;
|
| + } else if (data == "/") {
|
| + state = closeTagOpenState;
|
| + } else if (isLetter(data)) {
|
| + currentToken = new StartTagToken(data);
|
| + state = tagNameState;
|
| + } else if (data == ">") {
|
| + // XXX In theory it could be something besides a tag name. But
|
| + // do we really care?
|
| + _addToken(new ParseErrorToken(
|
| + "expected-tag-name-but-got-right-bracket"));
|
| + _addToken(new CharactersToken("<>"));
|
| + state = dataState;
|
| + } else if (data == "?") {
|
| + // XXX In theory it could be something besides a tag name. But
|
| + // do we really care?
|
| + _addToken(new ParseErrorToken(
|
| + "expected-tag-name-but-got-question-mark"));
|
| + stream.unget(data);
|
| + state = bogusCommentState;
|
| + } else {
|
| + // XXX
|
| + _addToken(new ParseErrorToken("expected-tag-name"));
|
| + _addToken(new CharactersToken("<"));
|
| + stream.unget(data);
|
| + state = dataState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool closeTagOpenState() {
|
| + var data = stream.char();
|
| + if (isLetter(data)) {
|
| + currentToken = new EndTagToken(data);
|
| + state = tagNameState;
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken(
|
| + "expected-closing-tag-but-got-right-bracket"));
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken(
|
| + "expected-closing-tag-but-got-eof"));
|
| + _addToken(new CharactersToken("</"));
|
| + state = dataState;
|
| + } else {
|
| + // XXX data can be _'_...
|
| + _addToken(new ParseErrorToken(
|
| + "expected-closing-tag-but-got-char", messageParams: {"data": data}));
|
| + stream.unget(data);
|
| + state = bogusCommentState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool tagNameState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + state = beforeAttributeNameState;
|
| + } else if (data == ">") {
|
| + emitCurrentToken();
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-tag-name"));
|
| + state = dataState;
|
| + } else if (data == "/") {
|
| + state = selfClosingStartTagState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentTagToken.name = '${currentTagToken.name}\uFFFD';
|
| + } else {
|
| + currentTagToken.name = '${currentTagToken.name}$data';
|
| + // (Don't use charsUntil here, because tag names are
|
| + // very short and it's faster to not do anything fancy)
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool rcdataLessThanSignState() {
|
| + var data = stream.char();
|
| + if (data == "/") {
|
| + temporaryBuffer = "";
|
| + state = rcdataEndTagOpenState;
|
| + } else {
|
| + _addToken(new CharactersToken("<"));
|
| + stream.unget(data);
|
| + state = rcdataState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool rcdataEndTagOpenState() {
|
| + var data = stream.char();
|
| + if (isLetter(data)) {
|
| + temporaryBuffer = '${temporaryBuffer}$data';
|
| + state = rcdataEndTagNameState;
|
| + } else {
|
| + _addToken(new CharactersToken("</"));
|
| + stream.unget(data);
|
| + state = rcdataState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool _tokenIsAppropriate() {
|
| + return currentToken is TagToken &&
|
| + currentTagToken.name.toLowerCase() == temporaryBuffer.toLowerCase();
|
| + }
|
| +
|
| + bool rcdataEndTagNameState() {
|
| + var appropriate = _tokenIsAppropriate();
|
| + var data = stream.char();
|
| + if (isWhitespace(data) && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + state = beforeAttributeNameState;
|
| + } else if (data == "/" && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + state = selfClosingStartTagState;
|
| + } else if (data == ">" && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + emitCurrentToken();
|
| + state = dataState;
|
| + } else if (isLetter(data)) {
|
| + temporaryBuffer = '${temporaryBuffer}$data';
|
| + } else {
|
| + _addToken(new CharactersToken("</$temporaryBuffer"));
|
| + stream.unget(data);
|
| + state = rcdataState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool rawtextLessThanSignState() {
|
| + var data = stream.char();
|
| + if (data == "/") {
|
| + temporaryBuffer = "";
|
| + state = rawtextEndTagOpenState;
|
| + } else {
|
| + _addToken(new CharactersToken("<"));
|
| + stream.unget(data);
|
| + state = rawtextState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool rawtextEndTagOpenState() {
|
| + var data = stream.char();
|
| + if (isLetter(data)) {
|
| + temporaryBuffer = '${temporaryBuffer}$data';
|
| + state = rawtextEndTagNameState;
|
| + } else {
|
| + _addToken(new CharactersToken("</"));
|
| + stream.unget(data);
|
| + state = rawtextState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool rawtextEndTagNameState() {
|
| + var appropriate = _tokenIsAppropriate();
|
| + var data = stream.char();
|
| + if (isWhitespace(data) && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + state = beforeAttributeNameState;
|
| + } else if (data == "/" && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + state = selfClosingStartTagState;
|
| + } else if (data == ">" && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + emitCurrentToken();
|
| + state = dataState;
|
| + } else if (isLetter(data)) {
|
| + temporaryBuffer = '${temporaryBuffer}$data';
|
| + } else {
|
| + _addToken(new CharactersToken("</$temporaryBuffer"));
|
| + stream.unget(data);
|
| + state = rawtextState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataLessThanSignState() {
|
| + var data = stream.char();
|
| + if (data == "/") {
|
| + temporaryBuffer = "";
|
| + state = scriptDataEndTagOpenState;
|
| + } else if (data == "!") {
|
| + _addToken(new CharactersToken("<!"));
|
| + state = scriptDataEscapeStartState;
|
| + } else {
|
| + _addToken(new CharactersToken("<"));
|
| + stream.unget(data);
|
| + state = scriptDataState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataEndTagOpenState() {
|
| + var data = stream.char();
|
| + if (isLetter(data)) {
|
| + temporaryBuffer = '${temporaryBuffer}$data';
|
| + state = scriptDataEndTagNameState;
|
| + } else {
|
| + _addToken(new CharactersToken("</"));
|
| + stream.unget(data);
|
| + state = scriptDataState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataEndTagNameState() {
|
| + var appropriate = _tokenIsAppropriate();
|
| + var data = stream.char();
|
| + if (isWhitespace(data) && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + state = beforeAttributeNameState;
|
| + } else if (data == "/" && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + state = selfClosingStartTagState;
|
| + } else if (data == ">" && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + emitCurrentToken();
|
| + state = dataState;
|
| + } else if (isLetter(data)) {
|
| + temporaryBuffer = '${temporaryBuffer}$data';
|
| + } else {
|
| + _addToken(new CharactersToken("</$temporaryBuffer"));
|
| + stream.unget(data);
|
| + state = scriptDataState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataEscapeStartState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + _addToken(new CharactersToken("-"));
|
| + state = scriptDataEscapeStartDashState;
|
| + } else {
|
| + stream.unget(data);
|
| + state = scriptDataState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataEscapeStartDashState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + _addToken(new CharactersToken("-"));
|
| + state = scriptDataEscapedDashDashState;
|
| + } else {
|
| + stream.unget(data);
|
| + state = scriptDataState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataEscapedState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + _addToken(new CharactersToken("-"));
|
| + state = scriptDataEscapedDashState;
|
| + } else if (data == "<") {
|
| + state = scriptDataEscapedLessThanSignState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\uFFFD"));
|
| + } else if (data == EOF) {
|
| + state = dataState;
|
| + } else {
|
| + var chars = stream.charsUntil("<-\u0000");
|
| + _addToken(new CharactersToken("${data}${chars}"));
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataEscapedDashState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + _addToken(new CharactersToken("-"));
|
| + state = scriptDataEscapedDashDashState;
|
| + } else if (data == "<") {
|
| + state = scriptDataEscapedLessThanSignState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\uFFFD"));
|
| + state = scriptDataEscapedState;
|
| + } else if (data == EOF) {
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new CharactersToken(data));
|
| + state = scriptDataEscapedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataEscapedDashDashState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + _addToken(new CharactersToken("-"));
|
| + } else if (data == "<") {
|
| + state = scriptDataEscapedLessThanSignState;
|
| + } else if (data == ">") {
|
| + _addToken(new CharactersToken(">"));
|
| + state = scriptDataState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\uFFFD"));
|
| + state = scriptDataEscapedState;
|
| + } else if (data == EOF) {
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new CharactersToken(data));
|
| + state = scriptDataEscapedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataEscapedLessThanSignState() {
|
| + var data = stream.char();
|
| + if (data == "/") {
|
| + temporaryBuffer = "";
|
| + state = scriptDataEscapedEndTagOpenState;
|
| + } else if (isLetter(data)) {
|
| + _addToken(new CharactersToken("<$data"));
|
| + temporaryBuffer = data;
|
| + state = scriptDataDoubleEscapeStartState;
|
| + } else {
|
| + _addToken(new CharactersToken("<"));
|
| + stream.unget(data);
|
| + state = scriptDataEscapedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataEscapedEndTagOpenState() {
|
| + var data = stream.char();
|
| + if (isLetter(data)) {
|
| + temporaryBuffer = data;
|
| + state = scriptDataEscapedEndTagNameState;
|
| + } else {
|
| + _addToken(new CharactersToken("</"));
|
| + stream.unget(data);
|
| + state = scriptDataEscapedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataEscapedEndTagNameState() {
|
| + var appropriate = _tokenIsAppropriate();
|
| + var data = stream.char();
|
| + if (isWhitespace(data) && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + state = beforeAttributeNameState;
|
| + } else if (data == "/" && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + state = selfClosingStartTagState;
|
| + } else if (data == ">" && appropriate) {
|
| + currentToken = new EndTagToken(temporaryBuffer);
|
| + emitCurrentToken();
|
| + state = dataState;
|
| + } else if (isLetter(data)) {
|
| + temporaryBuffer = '${temporaryBuffer}$data';
|
| + } else {
|
| + _addToken(new CharactersToken("</$temporaryBuffer"));
|
| + stream.unget(data);
|
| + state = scriptDataEscapedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataDoubleEscapeStartState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data) || data == "/" || data == ">") {
|
| + _addToken(new CharactersToken(data));
|
| + if (temporaryBuffer.toLowerCase() == "script") {
|
| + state = scriptDataDoubleEscapedState;
|
| + } else {
|
| + state = scriptDataEscapedState;
|
| + }
|
| + } else if (isLetter(data)) {
|
| + _addToken(new CharactersToken(data));
|
| + temporaryBuffer = '${temporaryBuffer}$data';
|
| + } else {
|
| + stream.unget(data);
|
| + state = scriptDataEscapedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataDoubleEscapedState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + _addToken(new CharactersToken("-"));
|
| + state = scriptDataDoubleEscapedDashState;
|
| + } else if (data == "<") {
|
| + _addToken(new CharactersToken("<"));
|
| + state = scriptDataDoubleEscapedLessThanSignState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\uFFFD"));
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-script-in-script"));
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new CharactersToken(data));
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataDoubleEscapedDashState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + _addToken(new CharactersToken("-"));
|
| + state = scriptDataDoubleEscapedDashDashState;
|
| + } else if (data == "<") {
|
| + _addToken(new CharactersToken("<"));
|
| + state = scriptDataDoubleEscapedLessThanSignState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\uFFFD"));
|
| + state = scriptDataDoubleEscapedState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-script-in-script"));
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new CharactersToken(data));
|
| + state = scriptDataDoubleEscapedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + // TODO(jmesserly): report bug in original code
|
| + // (was "Dash" instead of "DashDash")
|
| + bool scriptDataDoubleEscapedDashDashState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + _addToken(new CharactersToken("-"));
|
| + } else if (data == "<") {
|
| + _addToken(new CharactersToken("<"));
|
| + state = scriptDataDoubleEscapedLessThanSignState;
|
| + } else if (data == ">") {
|
| + _addToken(new CharactersToken(">"));
|
| + state = scriptDataState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addToken(new CharactersToken("\uFFFD"));
|
| + state = scriptDataDoubleEscapedState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-script-in-script"));
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new CharactersToken(data));
|
| + state = scriptDataDoubleEscapedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataDoubleEscapedLessThanSignState() {
|
| + var data = stream.char();
|
| + if (data == "/") {
|
| + _addToken(new CharactersToken("/"));
|
| + temporaryBuffer = "";
|
| + state = scriptDataDoubleEscapeEndState;
|
| + } else {
|
| + stream.unget(data);
|
| + state = scriptDataDoubleEscapedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool scriptDataDoubleEscapeEndState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data) || data == "/" || data == ">") {
|
| + _addToken(new CharactersToken(data));
|
| + if (temporaryBuffer.toLowerCase() == "script") {
|
| + state = scriptDataEscapedState;
|
| + } else {
|
| + state = scriptDataDoubleEscapedState;
|
| + }
|
| + } else if (isLetter(data)) {
|
| + _addToken(new CharactersToken(data));
|
| + temporaryBuffer = '${temporaryBuffer}$data';
|
| + } else {
|
| + stream.unget(data);
|
| + state = scriptDataDoubleEscapedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool beforeAttributeNameState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + stream.charsUntil(spaceCharacters, true);
|
| + } else if (isLetter(data)) {
|
| + _addAttribute(data);
|
| + state = attributeNameState;
|
| + } else if (data == ">") {
|
| + emitCurrentToken();
|
| + } else if (data == "/") {
|
| + state = selfClosingStartTagState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("expected-attribute-name-but-got-eof"));
|
| + state = dataState;
|
| + } else if ("'\"=<".contains(data)) {
|
| + _addToken(new ParseErrorToken("invalid-character-in-attribute-name"));
|
| + _addAttribute(data);
|
| + state = attributeNameState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addAttribute("\uFFFD");
|
| + state = attributeNameState;
|
| + } else {
|
| + _addAttribute(data);
|
| + state = attributeNameState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool attributeNameState() {
|
| + var data = stream.char();
|
| + bool leavingThisState = true;
|
| + bool emitToken = false;
|
| + if (data == "=") {
|
| + state = beforeAttributeValueState;
|
| + } else if (isLetter(data)) {
|
| + _attributeName = '$_attributeName$data'
|
| + '${stream.charsUntil(asciiLetters, true)}';
|
| + leavingThisState = false;
|
| + } else if (data == ">") {
|
| + // XXX If we emit here the attributes are converted to a dict
|
| + // without being checked and when the code below runs we error
|
| + // because data is a dict not a list
|
| + emitToken = true;
|
| + } else if (isWhitespace(data)) {
|
| + state = afterAttributeNameState;
|
| + } else if (data == "/") {
|
| + state = selfClosingStartTagState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _attributeName = '${_attributeName}\uFFFD';
|
| + leavingThisState = false;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-attribute-name"));
|
| + state = dataState;
|
| + } else if ("'\"<".contains(data)) {
|
| + _addToken(new ParseErrorToken("invalid-character-in-attribute-name"));
|
| + _attributeName = '$_attributeName$data';
|
| + leavingThisState = false;
|
| + } else {
|
| + _attributeName = '$_attributeName$data';
|
| + leavingThisState = false;
|
| + }
|
| +
|
| + if (leavingThisState) {
|
| + _markAttributeNameEnd(-1);
|
| +
|
| + // Attributes are not dropped at this stage. That happens when the
|
| + // start tag token is emitted so values can still be safely appended
|
| + // to attributes, but we do want to report the parse error in time.
|
| + if (lowercaseAttrName) {
|
| + _attributeName = asciiUpper2Lower(_attributeName);
|
| + }
|
| + if (_attributeNames == null) _attributeNames = new Set();
|
| + if (_attributeNames.contains(_attributeName)) {
|
| + _addToken(new ParseErrorToken("duplicate-attribute"));
|
| + }
|
| + _attributeNames.add(_attributeName);
|
| +
|
| + // XXX Fix for above XXX
|
| + if (emitToken) {
|
| + emitCurrentToken();
|
| + }
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool afterAttributeNameState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + stream.charsUntil(spaceCharacters, true);
|
| + } else if (data == "=") {
|
| + state = beforeAttributeValueState;
|
| + } else if (data == ">") {
|
| + emitCurrentToken();
|
| + } else if (isLetter(data)) {
|
| + _addAttribute(data);
|
| + state = attributeNameState;
|
| + } else if (data == "/") {
|
| + state = selfClosingStartTagState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _addAttribute("\uFFFD");
|
| + state = attributeNameState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("expected-end-of-tag-but-got-eof"));
|
| + state = dataState;
|
| + } else if ("'\"<".contains(data)) {
|
| + _addToken(new ParseErrorToken("invalid-character-after-attribute-name"));
|
| + _addAttribute(data);
|
| + state = attributeNameState;
|
| + } else {
|
| + _addAttribute(data);
|
| + state = attributeNameState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool beforeAttributeValueState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + stream.charsUntil(spaceCharacters, true);
|
| + } else if (data == "\"") {
|
| + _markAttributeValueStart(0);
|
| + state = attributeValueDoubleQuotedState;
|
| + } else if (data == "&") {
|
| + state = attributeValueUnQuotedState;
|
| + stream.unget(data);
|
| + _markAttributeValueStart(0);
|
| + } else if (data == "'") {
|
| + _markAttributeValueStart(0);
|
| + state = attributeValueSingleQuotedState;
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken(
|
| + "expected-attribute-value-but-got-right-bracket"));
|
| + emitCurrentToken();
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _markAttributeValueStart(-1);
|
| + _attributeValue = '${_attributeValue}\uFFFD';
|
| + state = attributeValueUnQuotedState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("expected-attribute-value-but-got-eof"));
|
| + state = dataState;
|
| + } else if ("=<`".contains(data)) {
|
| + _addToken(new ParseErrorToken("equals-in-unquoted-attribute-value"));
|
| + _markAttributeValueStart(-1);
|
| + _attributeValue = '$_attributeValue$data';
|
| + state = attributeValueUnQuotedState;
|
| + } else {
|
| + _markAttributeValueStart(-1);
|
| + _attributeValue = '$_attributeValue$data';
|
| + state = attributeValueUnQuotedState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool attributeValueDoubleQuotedState() {
|
| + var data = stream.char();
|
| + if (data == "\"") {
|
| + _markAttributeValueEnd(-1);
|
| + _markAttributeEnd(0);
|
| + state = afterAttributeValueState;
|
| + } else if (data == "&") {
|
| + processEntityInAttribute('"');
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _attributeValue = '${_attributeValue}\uFFFD';
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-attribute-value-double-quote"));
|
| + _markAttributeValueEnd(-1);
|
| + state = dataState;
|
| + } else {
|
| + _attributeValue = '$_attributeValue$data${stream.charsUntil("\"&")}';
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool attributeValueSingleQuotedState() {
|
| + var data = stream.char();
|
| + if (data == "'") {
|
| + _markAttributeValueEnd(-1);
|
| + _markAttributeEnd(0);
|
| + state = afterAttributeValueState;
|
| + } else if (data == "&") {
|
| + processEntityInAttribute("'");
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _attributeValue = '${_attributeValue}\uFFFD';
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-attribute-value-single-quote"));
|
| + _markAttributeValueEnd(-1);
|
| + state = dataState;
|
| + } else {
|
| + _attributeValue = '$_attributeValue$data${stream.charsUntil("\'&")}';
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool attributeValueUnQuotedState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + _markAttributeValueEnd(-1);
|
| + state = beforeAttributeNameState;
|
| + } else if (data == "&") {
|
| + processEntityInAttribute(">");
|
| + } else if (data == ">") {
|
| + _markAttributeValueEnd(-1);
|
| + emitCurrentToken();
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-attribute-value-no-quotes"));
|
| + _markAttributeValueEnd(-1);
|
| + state = dataState;
|
| + } else if ('"\'=<`'.contains(data)) {
|
| + _addToken(new ParseErrorToken(
|
| + "unexpected-character-in-unquoted-attribute-value"));
|
| + _attributeValue = '$_attributeValue$data';
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + _attributeValue = '${_attributeValue}\uFFFD';
|
| + } else {
|
| + _attributeValue = '$_attributeValue$data'
|
| + '${stream.charsUntil("&>\"\'=<`$spaceCharacters")}';
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool afterAttributeValueState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + state = beforeAttributeNameState;
|
| + } else if (data == ">") {
|
| + emitCurrentToken();
|
| + } else if (data == "/") {
|
| + state = selfClosingStartTagState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("unexpected-EOF-after-attribute-value"));
|
| + stream.unget(data);
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new ParseErrorToken(
|
| + "unexpected-character-after-attribute-value"));
|
| + stream.unget(data);
|
| + state = beforeAttributeNameState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool selfClosingStartTagState() {
|
| + var data = stream.char();
|
| + if (data == ">") {
|
| + currentTagToken.selfClosing = true;
|
| + emitCurrentToken();
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("unexpected-EOF-after-solidus-in-tag"));
|
| + stream.unget(data);
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new ParseErrorToken(
|
| + "unexpected-character-after-soldius-in-tag"));
|
| + stream.unget(data);
|
| + state = beforeAttributeNameState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool bogusCommentState() {
|
| + // Make a new comment token and give it as value all the characters
|
| + // until the first > or EOF (charsUntil checks for EOF automatically)
|
| + // and emit it.
|
| + var data = stream.charsUntil(">");
|
| + data = data.replaceAll("\u0000", "\uFFFD");
|
| + _addToken(new CommentToken(data));
|
| +
|
| + // Eat the character directly after the bogus comment which is either a
|
| + // ">" or an EOF.
|
| + stream.char();
|
| + state = dataState;
|
| + return true;
|
| + }
|
| +
|
| + bool markupDeclarationOpenState() {
|
| + var charStack = [stream.char()];
|
| + if (charStack.last == "-") {
|
| + charStack.add(stream.char());
|
| + if (charStack.last == "-") {
|
| + currentToken = new CommentToken("");
|
| + state = commentStartState;
|
| + return true;
|
| + }
|
| + } else if (charStack.last == 'd' || charStack.last == 'D') {
|
| + var matched = true;
|
| + for (var expected in const ['oO', 'cC', 'tT', 'yY', 'pP', 'eE']) {
|
| + var char = stream.char();
|
| + charStack.add(char);
|
| + if (char == EOF || !expected.contains(char)) {
|
| + matched = false;
|
| + break;
|
| + }
|
| + }
|
| + if (matched) {
|
| + currentToken = new DoctypeToken(correct: true);
|
| + state = doctypeState;
|
| + return true;
|
| + }
|
| + } else if (charStack.last == "[" &&
|
| + parser != null && parser.tree.openElements.length > 0 &&
|
| + parser.tree.openElements.last.namespace
|
| + != parser.tree.defaultNamespace) {
|
| + var matched = true;
|
| + for (var expected in const ["C", "D", "A", "T", "A", "["]) {
|
| + charStack.add(stream.char());
|
| + if (charStack.last != expected) {
|
| + matched = false;
|
| + break;
|
| + }
|
| + }
|
| + if (matched) {
|
| + state = cdataSectionState;
|
| + return true;
|
| + }
|
| + }
|
| +
|
| + _addToken(new ParseErrorToken("expected-dashes-or-doctype"));
|
| +
|
| + while (charStack.length > 0) {
|
| + stream.unget(charStack.removeLast());
|
| + }
|
| + state = bogusCommentState;
|
| + return true;
|
| + }
|
| +
|
| + bool commentStartState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + state = commentStartDashState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentStringToken.data = '${currentStringToken.data}\uFFFD';
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken("incorrect-comment"));
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-comment"));
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentStringToken.data = '${currentStringToken.data}$data';
|
| + state = commentState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool commentStartDashState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + state = commentEndState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentStringToken.data = '${currentStringToken.data}-\uFFFD';
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken("incorrect-comment"));
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-comment"));
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentStringToken.data = '${currentStringToken.data}-${data}';
|
| + state = commentState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool commentState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + state = commentEndDashState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentStringToken.data = '${currentStringToken.data}\uFFFD';
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-comment"));
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentStringToken.data = '${currentStringToken.data}$data'
|
| + '${stream.charsUntil("-\u0000")}';
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool commentEndDashState() {
|
| + var data = stream.char();
|
| + if (data == "-") {
|
| + state = commentEndState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentStringToken.data = "${currentStringToken.data}-\uFFFD";
|
| + state = commentState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-comment-end-dash"));
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentStringToken.data = "${currentStringToken.data}-${data}";
|
| + state = commentState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool commentEndState() {
|
| + var data = stream.char();
|
| + if (data == ">") {
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentStringToken.data = '${currentStringToken.data}--\uFFFD';
|
| + state = commentState;
|
| + } else if (data == "!") {
|
| + _addToken(new ParseErrorToken(
|
| + "unexpected-bang-after-double-dash-in-comment"));
|
| + state = commentEndBangState;
|
| + } else if (data == "-") {
|
| + _addToken(new ParseErrorToken(
|
| + "unexpected-dash-after-double-dash-in-comment"));
|
| + currentStringToken.data = '${currentStringToken.data}$data';
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-comment-double-dash"));
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + // XXX
|
| + _addToken(new ParseErrorToken("unexpected-char-in-comment"));
|
| + currentStringToken.data = "${currentStringToken.data}--${data}";
|
| + state = commentState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool commentEndBangState() {
|
| + var data = stream.char();
|
| + if (data == ">") {
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == "-") {
|
| + currentStringToken.data = '${currentStringToken.data}--!';
|
| + state = commentEndDashState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentStringToken.data = '${currentStringToken.data}--!\uFFFD';
|
| + state = commentState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-comment-end-bang-state"));
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentStringToken.data = "${currentStringToken.data}--!${data}";
|
| + state = commentState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool doctypeState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + state = beforeDoctypeNameState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken(
|
| + "expected-doctype-name-but-got-eof"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new ParseErrorToken("need-space-after-doctype"));
|
| + stream.unget(data);
|
| + state = beforeDoctypeNameState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool beforeDoctypeNameState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + return true;
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken(
|
| + "expected-doctype-name-but-got-right-bracket"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentDoctypeToken.name = "\uFFFD";
|
| + state = doctypeNameState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken(
|
| + "expected-doctype-name-but-got-eof"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentDoctypeToken.name = data;
|
| + state = doctypeNameState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool doctypeNameState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + currentDoctypeToken.name = asciiUpper2Lower(currentDoctypeToken.name);
|
| + state = afterDoctypeNameState;
|
| + } else if (data == ">") {
|
| + currentDoctypeToken.name = asciiUpper2Lower(currentDoctypeToken.name);
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentDoctypeToken.name = "${currentDoctypeToken.name}\uFFFD";
|
| + state = doctypeNameState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype-name"));
|
| + currentDoctypeToken.correct = false;
|
| + currentDoctypeToken.name = asciiUpper2Lower(currentDoctypeToken.name);
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentDoctypeToken.name = '${currentDoctypeToken.name}$data';
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool afterDoctypeNameState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + return true;
|
| + } else if (data == ">") {
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + currentDoctypeToken.correct = false;
|
| + stream.unget(data);
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + if (data == "p" || data == "P") {
|
| + // TODO(jmesserly): would be nice to have a helper for this.
|
| + var matched = true;
|
| + for (var expected in const ["uU", "bB", "lL", "iI", "cC"]) {
|
| + data = stream.char();
|
| + if (data == EOF || !expected.contains(data)) {
|
| + matched = false;
|
| + break;
|
| + }
|
| + }
|
| + if (matched) {
|
| + state = afterDoctypePublicKeywordState;
|
| + return true;
|
| + }
|
| + } else if (data == "s" || data == "S") {
|
| + var matched = true;
|
| + for (var expected in const ["yY", "sS", "tT", "eE", "mM"]) {
|
| + data = stream.char();
|
| + if (data == EOF || !expected.contains(data)) {
|
| + matched = false;
|
| + break;
|
| + }
|
| + }
|
| + if (matched) {
|
| + state = afterDoctypeSystemKeywordState;
|
| + return true;
|
| + }
|
| + }
|
| +
|
| + // All the characters read before the current 'data' will be
|
| + // [a-zA-Z], so they're garbage in the bogus doctype and can be
|
| + // discarded; only the latest character might be '>' or EOF
|
| + // and needs to be ungetted
|
| + stream.unget(data);
|
| + _addToken(new ParseErrorToken(
|
| + "expected-space-or-right-bracket-in-doctype",
|
| + messageParams: {"data": data}));
|
| + currentDoctypeToken.correct = false;
|
| + state = bogusDoctypeState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool afterDoctypePublicKeywordState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + state = beforeDoctypePublicIdentifierState;
|
| + } else if (data == "'" || data == '"') {
|
| + _addToken(new ParseErrorToken("unexpected-char-in-doctype"));
|
| + stream.unget(data);
|
| + state = beforeDoctypePublicIdentifierState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + stream.unget(data);
|
| + state = beforeDoctypePublicIdentifierState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool beforeDoctypePublicIdentifierState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + return true;
|
| + } else if (data == "\"") {
|
| + currentDoctypeToken.publicId = "";
|
| + state = doctypePublicIdentifierDoubleQuotedState;
|
| + } else if (data == "'") {
|
| + currentDoctypeToken.publicId = "";
|
| + state = doctypePublicIdentifierSingleQuotedState;
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken("unexpected-end-of-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new ParseErrorToken("unexpected-char-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + state = bogusDoctypeState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool doctypePublicIdentifierDoubleQuotedState() {
|
| + var data = stream.char();
|
| + if (data == '"') {
|
| + state = afterDoctypePublicIdentifierState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentDoctypeToken.publicId = "${currentDoctypeToken.publicId}\uFFFD";
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken("unexpected-end-of-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentDoctypeToken.publicId = '${currentDoctypeToken.publicId}$data';
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool doctypePublicIdentifierSingleQuotedState() {
|
| + var data = stream.char();
|
| + if (data == "'") {
|
| + state = afterDoctypePublicIdentifierState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentDoctypeToken.publicId = "${currentDoctypeToken.publicId}\uFFFD";
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken("unexpected-end-of-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentDoctypeToken.publicId = '${currentDoctypeToken.publicId}$data';
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool afterDoctypePublicIdentifierState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + state = betweenDoctypePublicAndSystemIdentifiersState;
|
| + } else if (data == ">") {
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == '"') {
|
| + _addToken(new ParseErrorToken("unexpected-char-in-doctype"));
|
| + currentDoctypeToken.systemId = "";
|
| + state = doctypeSystemIdentifierDoubleQuotedState;
|
| + } else if (data == "'") {
|
| + _addToken(new ParseErrorToken("unexpected-char-in-doctype"));
|
| + currentDoctypeToken.systemId = "";
|
| + state = doctypeSystemIdentifierSingleQuotedState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new ParseErrorToken("unexpected-char-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + state = bogusDoctypeState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool betweenDoctypePublicAndSystemIdentifiersState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + return true;
|
| + } else if (data == ">") {
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == '"') {
|
| + currentDoctypeToken.systemId = "";
|
| + state = doctypeSystemIdentifierDoubleQuotedState;
|
| + } else if (data == "'") {
|
| + currentDoctypeToken.systemId = "";
|
| + state = doctypeSystemIdentifierSingleQuotedState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new ParseErrorToken("unexpected-char-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + state = bogusDoctypeState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool afterDoctypeSystemKeywordState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + state = beforeDoctypeSystemIdentifierState;
|
| + } else if (data == "'" || data == '"') {
|
| + _addToken(new ParseErrorToken("unexpected-char-in-doctype"));
|
| + stream.unget(data);
|
| + state = beforeDoctypeSystemIdentifierState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + stream.unget(data);
|
| + state = beforeDoctypeSystemIdentifierState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool beforeDoctypeSystemIdentifierState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + return true;
|
| + } else if (data == "\"") {
|
| + currentDoctypeToken.systemId = "";
|
| + state = doctypeSystemIdentifierDoubleQuotedState;
|
| + } else if (data == "'") {
|
| + currentDoctypeToken.systemId = "";
|
| + state = doctypeSystemIdentifierSingleQuotedState;
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken("unexpected-char-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new ParseErrorToken("unexpected-char-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + state = bogusDoctypeState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool doctypeSystemIdentifierDoubleQuotedState() {
|
| + var data = stream.char();
|
| + if (data == "\"") {
|
| + state = afterDoctypeSystemIdentifierState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentDoctypeToken.systemId = "${currentDoctypeToken.systemId}\uFFFD";
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken("unexpected-end-of-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentDoctypeToken.systemId = '${currentDoctypeToken.systemId}$data';
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool doctypeSystemIdentifierSingleQuotedState() {
|
| + var data = stream.char();
|
| + if (data == "'") {
|
| + state = afterDoctypeSystemIdentifierState;
|
| + } else if (data == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + currentDoctypeToken.systemId = "${currentDoctypeToken.systemId}\uFFFD";
|
| + } else if (data == ">") {
|
| + _addToken(new ParseErrorToken("unexpected-end-of-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + currentDoctypeToken.systemId = '${currentDoctypeToken.systemId}$data';
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool afterDoctypeSystemIdentifierState() {
|
| + var data = stream.char();
|
| + if (isWhitespace(data)) {
|
| + return true;
|
| + } else if (data == ">") {
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + _addToken(new ParseErrorToken("eof-in-doctype"));
|
| + currentDoctypeToken.correct = false;
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else {
|
| + _addToken(new ParseErrorToken("unexpected-char-in-doctype"));
|
| + state = bogusDoctypeState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool bogusDoctypeState() {
|
| + var data = stream.char();
|
| + if (data == ">") {
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + } else if (data == EOF) {
|
| + // XXX EMIT
|
| + stream.unget(data);
|
| + _addToken(currentToken);
|
| + state = dataState;
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + bool cdataSectionState() {
|
| + var data = [];
|
| + int matchedEnd = 0;
|
| + while (true) {
|
| + var ch = stream.char();
|
| + if (ch == EOF) {
|
| + break;
|
| + }
|
| + // Deal with null here rather than in the parser
|
| + if (ch == "\u0000") {
|
| + _addToken(new ParseErrorToken("invalid-codepoint"));
|
| + ch = "\uFFFD";
|
| + }
|
| + data.add(ch);
|
| + // TODO(jmesserly): it'd be nice if we had an easier way to match the end,
|
| + // perhaps with a "peek" API.
|
| + if (ch == "]" && matchedEnd < 2) {
|
| + matchedEnd++;
|
| + } else if (ch == ">" && matchedEnd == 2) {
|
| + // Remove "]]>" from the end.
|
| + data.removeLast();
|
| + data.removeLast();
|
| + data.removeLast();
|
| + break;
|
| + } else {
|
| + matchedEnd = 0;
|
| + }
|
| + }
|
| +
|
| + if (data.length > 0) {
|
| + _addToken(new CharactersToken(data.join()));
|
| + }
|
| + state = dataState;
|
| + return true;
|
| + }
|
| +}
|
| +
|
|
|