| Index: mojo/public/dart/third_party/csslib/lib/src/tokenizer.dart
|
| diff --git a/mojo/public/dart/third_party/csslib/lib/src/tokenizer.dart b/mojo/public/dart/third_party/csslib/lib/src/tokenizer.dart
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d423104bae92eb05e5a278fb249e603021de8486
|
| --- /dev/null
|
| +++ b/mojo/public/dart/third_party/csslib/lib/src/tokenizer.dart
|
| @@ -0,0 +1,456 @@
|
| +// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
|
| +// for details. All rights reserved. Use of this source code is governed by a
|
| +// BSD-style license that can be found in the LICENSE file.
|
| +
|
| +part of csslib.parser;
|
| +
|
| +class Tokenizer extends TokenizerBase {
|
| + /** U+ prefix for unicode characters. */
|
| + final UNICODE_U = 'U'.codeUnitAt(0);
|
| + final UNICODE_LOWER_U = 'u'.codeUnitAt(0);
|
| + final UNICODE_PLUS = '+'.codeUnitAt(0);
|
| +
|
| + final QUESTION_MARK = '?'.codeUnitAt(0);
|
| +
|
| + /** CDATA keyword. */
|
| + final List CDATA_NAME = 'CDATA'.codeUnits;
|
| +
|
| + Tokenizer(SourceFile file, String text, bool skipWhitespace, [int index = 0])
|
| + : super(file, text, skipWhitespace, index);
|
| +
|
| + Token next({unicodeRange: false}) {
|
| + // keep track of our starting position
|
| + _startIndex = _index;
|
| +
|
| + int ch;
|
| + ch = _nextChar();
|
| + switch (ch) {
|
| + case TokenChar.NEWLINE:
|
| + case TokenChar.RETURN:
|
| + case TokenChar.SPACE:
|
| + case TokenChar.TAB:
|
| + return finishWhitespace();
|
| + case TokenChar.END_OF_FILE:
|
| + return _finishToken(TokenKind.END_OF_FILE);
|
| + case TokenChar.AT:
|
| + int peekCh = _peekChar();
|
| + if (TokenizerHelpers.isIdentifierStart(peekCh)) {
|
| + var oldIndex = _index;
|
| + var oldStartIndex = _startIndex;
|
| +
|
| + _startIndex = _index;
|
| + ch = _nextChar();
|
| + finishIdentifier();
|
| +
|
| + // Is it a directive?
|
| + int tokId = TokenKind.matchDirectives(
|
| + _text, _startIndex, _index - _startIndex);
|
| + if (tokId == -1) {
|
| + // No, is it a margin directive?
|
| + tokId = TokenKind.matchMarginDirectives(
|
| + _text, _startIndex, _index - _startIndex);
|
| + }
|
| +
|
| + if (tokId != -1) {
|
| + return _finishToken(tokId);
|
| + } else {
|
| + // Didn't find a CSS directive or margin directive so the @name is
|
| + // probably the Less definition '@name: value_variable_definition'.
|
| + _startIndex = oldStartIndex;
|
| + _index = oldIndex;
|
| + }
|
| + }
|
| + return _finishToken(TokenKind.AT);
|
| + case TokenChar.DOT:
|
| + int start = _startIndex; // Start where the dot started.
|
| + if (maybeEatDigit()) {
|
| + // looks like a number dot followed by digit(s).
|
| + Token number = finishNumber();
|
| + if (number.kind == TokenKind.INTEGER) {
|
| + // It's a number but it's preceeded by a dot, so make it a double.
|
| + _startIndex = start;
|
| + return _finishToken(TokenKind.DOUBLE);
|
| + } else {
|
| + // Don't allow dot followed by a double (e.g, '..1').
|
| + return _errorToken();
|
| + }
|
| + }
|
| + // It's really a dot.
|
| + return _finishToken(TokenKind.DOT);
|
| + case TokenChar.LPAREN:
|
| + return _finishToken(TokenKind.LPAREN);
|
| + case TokenChar.RPAREN:
|
| + return _finishToken(TokenKind.RPAREN);
|
| + case TokenChar.LBRACE:
|
| + return _finishToken(TokenKind.LBRACE);
|
| + case TokenChar.RBRACE:
|
| + return _finishToken(TokenKind.RBRACE);
|
| + case TokenChar.LBRACK:
|
| + return _finishToken(TokenKind.LBRACK);
|
| + case TokenChar.RBRACK:
|
| + if (_maybeEatChar(TokenChar.RBRACK) &&
|
| + _maybeEatChar(TokenChar.GREATER)) {
|
| + // ]]>
|
| + return next();
|
| + }
|
| + return _finishToken(TokenKind.RBRACK);
|
| + case TokenChar.HASH:
|
| + return _finishToken(TokenKind.HASH);
|
| + case TokenChar.PLUS:
|
| + if (maybeEatDigit()) return finishNumber();
|
| + return _finishToken(TokenKind.PLUS);
|
| + case TokenChar.MINUS:
|
| + if (inSelectorExpression || unicodeRange) {
|
| + // If parsing in pseudo function expression then minus is an operator
|
| + // not part of identifier e.g., interval value range (e.g. U+400-4ff)
|
| + // or minus operator in selector expression.
|
| + return _finishToken(TokenKind.MINUS);
|
| + } else if (maybeEatDigit()) {
|
| + return finishNumber();
|
| + } else if (TokenizerHelpers.isIdentifierStart(ch)) {
|
| + return finishIdentifier();
|
| + }
|
| + return _finishToken(TokenKind.MINUS);
|
| + case TokenChar.GREATER:
|
| + return _finishToken(TokenKind.GREATER);
|
| + case TokenChar.TILDE:
|
| + if (_maybeEatChar(TokenChar.EQUALS)) {
|
| + return _finishToken(TokenKind.INCLUDES); // ~=
|
| + }
|
| + return _finishToken(TokenKind.TILDE);
|
| + case TokenChar.ASTERISK:
|
| + if (_maybeEatChar(TokenChar.EQUALS)) {
|
| + return _finishToken(TokenKind.SUBSTRING_MATCH); // *=
|
| + }
|
| + return _finishToken(TokenKind.ASTERISK);
|
| + case TokenChar.AMPERSAND:
|
| + return _finishToken(TokenKind.AMPERSAND);
|
| + case TokenChar.NAMESPACE:
|
| + if (_maybeEatChar(TokenChar.EQUALS)) {
|
| + return _finishToken(TokenKind.DASH_MATCH); // |=
|
| + }
|
| + return _finishToken(TokenKind.NAMESPACE);
|
| + case TokenChar.COLON:
|
| + return _finishToken(TokenKind.COLON);
|
| + case TokenChar.COMMA:
|
| + return _finishToken(TokenKind.COMMA);
|
| + case TokenChar.SEMICOLON:
|
| + return _finishToken(TokenKind.SEMICOLON);
|
| + case TokenChar.PERCENT:
|
| + return _finishToken(TokenKind.PERCENT);
|
| + case TokenChar.SINGLE_QUOTE:
|
| + return _finishToken(TokenKind.SINGLE_QUOTE);
|
| + case TokenChar.DOUBLE_QUOTE:
|
| + return _finishToken(TokenKind.DOUBLE_QUOTE);
|
| + case TokenChar.SLASH:
|
| + if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment();
|
| + return _finishToken(TokenKind.SLASH);
|
| + case TokenChar.LESS: // <!--
|
| + if (_maybeEatChar(TokenChar.BANG)) {
|
| + if (_maybeEatChar(TokenChar.MINUS) &&
|
| + _maybeEatChar(TokenChar.MINUS)) {
|
| + return finishMultiLineComment();
|
| + } else if (_maybeEatChar(TokenChar.LBRACK) &&
|
| + _maybeEatChar(CDATA_NAME[0]) &&
|
| + _maybeEatChar(CDATA_NAME[1]) &&
|
| + _maybeEatChar(CDATA_NAME[2]) &&
|
| + _maybeEatChar(CDATA_NAME[3]) &&
|
| + _maybeEatChar(CDATA_NAME[4]) &&
|
| + _maybeEatChar(TokenChar.LBRACK)) {
|
| + // <![CDATA[
|
| + return next();
|
| + }
|
| + }
|
| + return _finishToken(TokenKind.LESS);
|
| + case TokenChar.EQUALS:
|
| + return _finishToken(TokenKind.EQUALS);
|
| + case TokenChar.CARET:
|
| + if (_maybeEatChar(TokenChar.EQUALS)) {
|
| + return _finishToken(TokenKind.PREFIX_MATCH); // ^=
|
| + }
|
| + return _finishToken(TokenKind.CARET);
|
| + case TokenChar.DOLLAR:
|
| + if (_maybeEatChar(TokenChar.EQUALS)) {
|
| + return _finishToken(TokenKind.SUFFIX_MATCH); // $=
|
| + }
|
| + return _finishToken(TokenKind.DOLLAR);
|
| + case TokenChar.BANG:
|
| + Token tok = finishIdentifier();
|
| + return (tok == null) ? _finishToken(TokenKind.BANG) : tok;
|
| + default:
|
| + // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's
|
| + // appropriate outside of a few specific places; certainly shouldn't
|
| + // be parsed in selectors.
|
| + if (!inSelector && ch == TokenChar.BACKSLASH) {
|
| + return _finishToken(TokenKind.BACKSLASH);
|
| + }
|
| +
|
| + if (unicodeRange) {
|
| + // Three types of unicode ranges:
|
| + // - single code point (e.g. U+416)
|
| + // - interval value range (e.g. U+400-4ff)
|
| + // - range where trailing ‘?’ characters imply ‘any digit value’
|
| + // (e.g. U+4??)
|
| + if (maybeEatHexDigit()) {
|
| + var t = finishHexNumber();
|
| + // Any question marks then it's a HEX_RANGE not HEX_NUMBER.
|
| + if (maybeEatQuestionMark()) finishUnicodeRange();
|
| + return t;
|
| + } else if (maybeEatQuestionMark()) {
|
| + // HEX_RANGE U+N???
|
| + return finishUnicodeRange();
|
| + } else {
|
| + return _errorToken();
|
| + }
|
| + } else if ((ch == UNICODE_U || ch == UNICODE_LOWER_U) &&
|
| + (_peekChar() == UNICODE_PLUS)) {
|
| + // Unicode range: U+uNumber[-U+uNumber]
|
| + // uNumber = 0..10FFFF
|
| + _nextChar(); // Skip +
|
| + _startIndex = _index; // Starts at the number
|
| + return _finishToken(TokenKind.UNICODE_RANGE);
|
| + } else if (varDef(ch)) {
|
| + return _finishToken(TokenKind.VAR_DEFINITION);
|
| + } else if (varUsage(ch)) {
|
| + return _finishToken(TokenKind.VAR_USAGE);
|
| + } else if (TokenizerHelpers.isIdentifierStart(ch)) {
|
| + return finishIdentifier();
|
| + } else if (TokenizerHelpers.isDigit(ch)) {
|
| + return finishNumber();
|
| + }
|
| + return _errorToken();
|
| + }
|
| + }
|
| +
|
| + bool varDef(int ch) {
|
| + return ch == 'v'.codeUnitAt(0) &&
|
| + _maybeEatChar('a'.codeUnitAt(0)) &&
|
| + _maybeEatChar('r'.codeUnitAt(0)) &&
|
| + _maybeEatChar('-'.codeUnitAt(0));
|
| + }
|
| +
|
| + bool varUsage(int ch) {
|
| + return ch == 'v'.codeUnitAt(0) &&
|
| + _maybeEatChar('a'.codeUnitAt(0)) &&
|
| + _maybeEatChar('r'.codeUnitAt(0)) &&
|
| + (_peekChar() == '-'.codeUnitAt(0));
|
| + }
|
| +
|
| + Token _errorToken([String message = null]) {
|
| + return _finishToken(TokenKind.ERROR);
|
| + }
|
| +
|
| + int getIdentifierKind() {
|
| + // Is the identifier a unit type?
|
| + int tokId = -1;
|
| +
|
| + // Don't match units in selectors or selector expressions.
|
| + if (!inSelectorExpression && !inSelector) {
|
| + tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);
|
| + }
|
| + if (tokId == -1) {
|
| + tokId = (_text.substring(_startIndex, _index) == '!important')
|
| + ? TokenKind.IMPORTANT
|
| + : -1;
|
| + }
|
| +
|
| + return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;
|
| + }
|
| +
|
| + Token finishIdentifier() {
|
| + // If we encounter an escape sequence, remember it so we can post-process
|
| + // to unescape.
|
| + var chars = [];
|
| +
|
| + // backup so we can start with the first character
|
| + int validateFrom = _index;
|
| + _index = _startIndex;
|
| + while (_index < _text.length) {
|
| + int ch = _text.codeUnitAt(_index);
|
| +
|
| + // If the previous character was "\" we need to escape. T
|
| + // http://www.w3.org/TR/CSS21/syndata.html#characters
|
| + // if followed by hexadecimal digits, create the appropriate character.
|
| + // otherwise, include the character in the identifier and don't treat it
|
| + // specially.
|
| + if (ch == 92 /*\*/ && _inString) {
|
| + int startHex = ++_index;
|
| + eatHexDigits(startHex + 6);
|
| + if (_index != startHex) {
|
| + // Parse the hex digits and add that character.
|
| + chars.add(int.parse('0x' + _text.substring(startHex, _index)));
|
| +
|
| + if (_index == _text.length) break;
|
| +
|
| + // if we stopped the hex because of a whitespace char, skip it
|
| + ch = _text.codeUnitAt(_index);
|
| + if (_index - startHex != 6 &&
|
| + (ch == TokenChar.SPACE ||
|
| + ch == TokenChar.TAB ||
|
| + ch == TokenChar.RETURN ||
|
| + ch == TokenChar.NEWLINE)) {
|
| + _index++;
|
| + }
|
| + } else {
|
| + // not a digit, just add the next character literally
|
| + if (_index == _text.length) break;
|
| + chars.add(_text.codeUnitAt(_index++));
|
| + }
|
| + } else if (_index < validateFrom ||
|
| + (inSelectorExpression
|
| + ? TokenizerHelpers.isIdentifierPartExpr(ch)
|
| + : TokenizerHelpers.isIdentifierPart(ch))) {
|
| + chars.add(ch);
|
| + _index++;
|
| + } else {
|
| + // Not an identifier or escaped character.
|
| + break;
|
| + }
|
| + }
|
| +
|
| + var span = _file.span(_startIndex, _index);
|
| + var text = new String.fromCharCodes(chars);
|
| +
|
| + return new IdentifierToken(text, getIdentifierKind(), span);
|
| + }
|
| +
|
| + Token finishNumber() {
|
| + eatDigits();
|
| +
|
| + if (_peekChar() == 46 /*.*/) {
|
| + // Handle the case of 1.toString().
|
| + _nextChar();
|
| + if (TokenizerHelpers.isDigit(_peekChar())) {
|
| + eatDigits();
|
| + return _finishToken(TokenKind.DOUBLE);
|
| + } else {
|
| + _index -= 1;
|
| + }
|
| + }
|
| +
|
| + return _finishToken(TokenKind.INTEGER);
|
| + }
|
| +
|
| + bool maybeEatDigit() {
|
| + if (_index < _text.length &&
|
| + TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {
|
| + _index += 1;
|
| + return true;
|
| + }
|
| + return false;
|
| + }
|
| +
|
| + Token finishHexNumber() {
|
| + eatHexDigits(_text.length);
|
| + return _finishToken(TokenKind.HEX_INTEGER);
|
| + }
|
| +
|
| + void eatHexDigits(int end) {
|
| + end = math.min(end, _text.length);
|
| + while (_index < end) {
|
| + if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
|
| + _index += 1;
|
| + } else {
|
| + return;
|
| + }
|
| + }
|
| + }
|
| +
|
| + bool maybeEatHexDigit() {
|
| + if (_index < _text.length &&
|
| + TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
|
| + _index += 1;
|
| + return true;
|
| + }
|
| + return false;
|
| + }
|
| +
|
| + bool maybeEatQuestionMark() {
|
| + if (_index < _text.length && _text.codeUnitAt(_index) == QUESTION_MARK) {
|
| + _index += 1;
|
| + return true;
|
| + }
|
| + return false;
|
| + }
|
| +
|
| + void eatQuestionMarks() {
|
| + while (_index < _text.length) {
|
| + if (_text.codeUnitAt(_index) == QUESTION_MARK) {
|
| + _index += 1;
|
| + } else {
|
| + return;
|
| + }
|
| + }
|
| + }
|
| +
|
| + Token finishUnicodeRange() {
|
| + eatQuestionMarks();
|
| + return _finishToken(TokenKind.HEX_RANGE);
|
| + }
|
| +
|
| + Token finishMultiLineComment() {
|
| + while (true) {
|
| + int ch = _nextChar();
|
| + if (ch == 0) {
|
| + return _finishToken(TokenKind.INCOMPLETE_COMMENT);
|
| + } else if (ch == 42 /*'*'*/) {
|
| + if (_maybeEatChar(47 /*'/'*/)) {
|
| + if (_inString) {
|
| + return next();
|
| + } else {
|
| + return _finishToken(TokenKind.COMMENT);
|
| + }
|
| + }
|
| + } else if (ch == TokenChar.MINUS) {
|
| + /* Check if close part of Comment Definition --> (CDC). */
|
| + if (_maybeEatChar(TokenChar.MINUS)) {
|
| + if (_maybeEatChar(TokenChar.GREATER)) {
|
| + if (_inString) {
|
| + return next();
|
| + } else {
|
| + return _finishToken(TokenKind.HTML_COMMENT);
|
| + }
|
| + }
|
| + }
|
| + }
|
| + }
|
| + return _errorToken();
|
| + }
|
| +}
|
| +
|
| +/** Static helper methods. */
|
| +class TokenizerHelpers {
|
| + static bool isIdentifierStart(int c) {
|
| + return isIdentifierStartExpr(c) || c == 45 /*-*/;
|
| + }
|
| +
|
| + static bool isDigit(int c) {
|
| + return (c >= 48 /*0*/ && c <= 57 /*9*/);
|
| + }
|
| +
|
| + static bool isHexDigit(int c) {
|
| + return (isDigit(c) ||
|
| + (c >= 97 /*a*/ && c <= 102 /*f*/) ||
|
| + (c >= 65 /*A*/ && c <= 70 /*F*/));
|
| + }
|
| +
|
| + static bool isIdentifierPart(int c) {
|
| + return isIdentifierPartExpr(c) || c == 45 /*-*/;
|
| + }
|
| +
|
| + /** Pseudo function expressions identifiers can't have a minus sign. */
|
| + static bool isIdentifierStartExpr(int c) {
|
| + return ((c >= 97 /*a*/ && c <= 122 /*z*/) ||
|
| + (c >= 65 /*A*/ && c <= 90 /*Z*/) ||
|
| + // Note: Unicode 10646 chars U+00A0 or higher are allowed, see:
|
| + // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier
|
| + // http://www.w3.org/TR/CSS21/syndata.html#characters
|
| + // Also, escaped character should be allowed.
|
| + c == 95 /*_*/ || c >= 0xA0 || c == 92 /*\*/);
|
| + }
|
| +
|
| + /** Pseudo function expressions identifiers can't have a minus sign. */
|
| + static bool isIdentifierPartExpr(int c) {
|
| + return (isIdentifierStartExpr(c) || isDigit(c));
|
| + }
|
| +}
|
|
|