Index: pkg/csslib/lib/src/tokenizer.dart |
diff --git a/pkg/csslib/lib/src/tokenizer.dart b/pkg/csslib/lib/src/tokenizer.dart |
deleted file mode 100644 |
index 8d929abeb92a35c7420260f359183e0ecaed8fa8..0000000000000000000000000000000000000000 |
--- a/pkg/csslib/lib/src/tokenizer.dart |
+++ /dev/null |
@@ -1,450 +0,0 @@ |
-// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
-// for details. All rights reserved. Use of this source code is governed by a |
-// BSD-style license that can be found in the LICENSE file. |
- |
-part of csslib.parser; |
- |
-class Tokenizer extends TokenizerBase { |
- /** U+ prefix for unicode characters. */ |
- final UNICODE_U = 'U'.codeUnitAt(0); |
- final UNICODE_LOWER_U = 'u'.codeUnitAt(0); |
- final UNICODE_PLUS = '+'.codeUnitAt(0); |
- |
- final QUESTION_MARK = '?'.codeUnitAt(0); |
- |
- /** CDATA keyword. */ |
- final List CDATA_NAME = 'CDATA'.codeUnits; |
- |
- Tokenizer(SourceFile file, String text, bool skipWhitespace, |
- [int index = 0]) |
- : super(file, text, skipWhitespace, index); |
- |
- Token next({unicodeRange: false}) { |
- // keep track of our starting position |
- _startIndex = _index; |
- |
- int ch; |
- ch = _nextChar(); |
- switch (ch) { |
- case TokenChar.NEWLINE: |
- case TokenChar.RETURN: |
- case TokenChar.SPACE: |
- case TokenChar.TAB: |
- return finishWhitespace(); |
- case TokenChar.END_OF_FILE: |
- return _finishToken(TokenKind.END_OF_FILE); |
- case TokenChar.AT: |
- int peekCh = _peekChar(); |
- if (TokenizerHelpers.isIdentifierStart(peekCh)) { |
- var oldIndex = _index; |
- var oldStartIndex = _startIndex; |
- |
- _startIndex = _index; |
- ch = _nextChar(); |
- Token ident = finishIdentifier(); |
- |
- // Is it a directive? |
- int tokId = TokenKind.matchDirectives(_text, _startIndex, |
- _index - _startIndex); |
- if (tokId == -1) { |
- // No, is it a margin directive? |
- tokId = TokenKind.matchMarginDirectives(_text, _startIndex, |
- _index - _startIndex); |
- } |
- |
- if (tokId != -1) { |
- return _finishToken(tokId); |
- } else { |
- // Didn't find a CSS directive or margin directive so the @name is |
- // probably the Less definition '@name: value_variable_definition'. |
- _startIndex = oldStartIndex; |
- _index = oldIndex; |
- } |
- } |
- return _finishToken(TokenKind.AT); |
- case TokenChar.DOT: |
- int start = _startIndex; // Start where the dot started. |
- if (maybeEatDigit()) { |
- // looks like a number dot followed by digit(s). |
- Token number = finishNumber(); |
- if (number.kind == TokenKind.INTEGER) { |
- // It's a number but it's preceeded by a dot, so make it a double. |
- _startIndex = start; |
- return _finishToken(TokenKind.DOUBLE); |
- } else { |
- // Don't allow dot followed by a double (e.g, '..1'). |
- return _errorToken(); |
- } |
- } |
- // It's really a dot. |
- return _finishToken(TokenKind.DOT); |
- case TokenChar.LPAREN: |
- return _finishToken(TokenKind.LPAREN); |
- case TokenChar.RPAREN: |
- return _finishToken(TokenKind.RPAREN); |
- case TokenChar.LBRACE: |
- return _finishToken(TokenKind.LBRACE); |
- case TokenChar.RBRACE: |
- return _finishToken(TokenKind.RBRACE); |
- case TokenChar.LBRACK: |
- return _finishToken(TokenKind.LBRACK); |
- case TokenChar.RBRACK: |
- if (_maybeEatChar(TokenChar.RBRACK) && |
- _maybeEatChar(TokenChar.GREATER)) { |
- // ]]> |
- return next(); |
- } |
- return _finishToken(TokenKind.RBRACK); |
- case TokenChar.HASH: |
- return _finishToken(TokenKind.HASH); |
- case TokenChar.PLUS: |
- if (maybeEatDigit()) return finishNumber(); |
- return _finishToken(TokenKind.PLUS); |
- case TokenChar.MINUS: |
- if (inSelectorExpression || unicodeRange) { |
- // If parsing in pseudo function expression then minus is an operator |
- // not part of identifier e.g., interval value range (e.g. U+400-4ff) |
- // or minus operator in selector expression. |
- return _finishToken(TokenKind.MINUS); |
- } else if (maybeEatDigit()) { |
- return finishNumber(); |
- } else if (TokenizerHelpers.isIdentifierStart(ch)) { |
- return finishIdentifier(); |
- } |
- return _finishToken(TokenKind.MINUS); |
- case TokenChar.GREATER: |
- return _finishToken(TokenKind.GREATER); |
- case TokenChar.TILDE: |
- if (_maybeEatChar(TokenChar.EQUALS)) { |
- return _finishToken(TokenKind.INCLUDES); // ~= |
- } |
- return _finishToken(TokenKind.TILDE); |
- case TokenChar.ASTERISK: |
- if (_maybeEatChar(TokenChar.EQUALS)) { |
- return _finishToken(TokenKind.SUBSTRING_MATCH); // *= |
- } |
- return _finishToken(TokenKind.ASTERISK); |
- case TokenChar.AMPERSAND: |
- return _finishToken(TokenKind.AMPERSAND); |
- case TokenChar.NAMESPACE: |
- if (_maybeEatChar(TokenChar.EQUALS)) { |
- return _finishToken(TokenKind.DASH_MATCH); // |= |
- } |
- return _finishToken(TokenKind.NAMESPACE); |
- case TokenChar.COLON: |
- return _finishToken(TokenKind.COLON); |
- case TokenChar.COMMA: |
- return _finishToken(TokenKind.COMMA); |
- case TokenChar.SEMICOLON: |
- return _finishToken(TokenKind.SEMICOLON); |
- case TokenChar.PERCENT: |
- return _finishToken(TokenKind.PERCENT); |
- case TokenChar.SINGLE_QUOTE: |
- return _finishToken(TokenKind.SINGLE_QUOTE); |
- case TokenChar.DOUBLE_QUOTE: |
- return _finishToken(TokenKind.DOUBLE_QUOTE); |
- case TokenChar.SLASH: |
- if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment(); |
- return _finishToken(TokenKind.SLASH); |
- case TokenChar.LESS: // <!-- |
- if (_maybeEatChar(TokenChar.BANG)) { |
- if (_maybeEatChar(TokenChar.MINUS) && |
- _maybeEatChar(TokenChar.MINUS)) { |
- return finishMultiLineComment(); |
- } else if (_maybeEatChar(TokenChar.LBRACK) && |
- _maybeEatChar(CDATA_NAME[0]) && |
- _maybeEatChar(CDATA_NAME[1]) && |
- _maybeEatChar(CDATA_NAME[2]) && |
- _maybeEatChar(CDATA_NAME[3]) && |
- _maybeEatChar(CDATA_NAME[4]) && |
- _maybeEatChar(TokenChar.LBRACK)) { |
- // <![CDATA[ |
- return next(); |
- } |
- } |
- return _finishToken(TokenKind.LESS); |
- case TokenChar.EQUALS: |
- return _finishToken(TokenKind.EQUALS); |
- case TokenChar.CARET: |
- if (_maybeEatChar(TokenChar.EQUALS)) { |
- return _finishToken(TokenKind.PREFIX_MATCH); // ^= |
- } |
- return _finishToken(TokenKind.CARET); |
- case TokenChar.DOLLAR: |
- if (_maybeEatChar(TokenChar.EQUALS)) { |
- return _finishToken(TokenKind.SUFFIX_MATCH); // $= |
- } |
- return _finishToken(TokenKind.DOLLAR); |
- case TokenChar.BANG: |
- Token tok = finishIdentifier(); |
- return (tok == null) ? _finishToken(TokenKind.BANG) : tok; |
- default: |
- // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's |
- // appropriate outside of a few specific places; certainly shouldn't |
- // be parsed in selectors. |
- if (!inSelector && ch == TokenChar.BACKSLASH) { |
- return _finishToken(TokenKind.BACKSLASH); |
- } |
- |
- if (unicodeRange) { |
- // Three types of unicode ranges: |
- // - single code point (e.g. U+416) |
- // - interval value range (e.g. U+400-4ff) |
- // - range where trailing ‘?’ characters imply ‘any digit value’ |
- // (e.g. U+4??) |
- if (maybeEatHexDigit()) { |
- var t = finishHexNumber(); |
- // Any question marks then it's a HEX_RANGE not HEX_NUMBER. |
- if (maybeEatQuestionMark()) finishUnicodeRange(); |
- return t; |
- } else if (maybeEatQuestionMark()) { |
- // HEX_RANGE U+N??? |
- return finishUnicodeRange(); |
- } else { |
- return _errorToken(); |
- } |
- } else if ((ch == UNICODE_U || ch == UNICODE_LOWER_U) && |
- (_peekChar() == UNICODE_PLUS)) { |
- // Unicode range: U+uNumber[-U+uNumber] |
- // uNumber = 0..10FFFF |
- _nextChar(); // Skip + |
- _startIndex = _index; // Starts at the number |
- return _finishToken(TokenKind.UNICODE_RANGE); |
- } else if (varDef(ch)) { |
- return _finishToken(TokenKind.VAR_DEFINITION); |
- } else if (varUsage(ch)) { |
- return _finishToken(TokenKind.VAR_USAGE); |
- } else if (TokenizerHelpers.isIdentifierStart(ch)) { |
- return finishIdentifier(); |
- } else if (TokenizerHelpers.isDigit(ch)) { |
- return finishNumber(); |
- } |
- return _errorToken(); |
- } |
- } |
- |
- bool varDef(int ch) { |
- return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) && |
- _maybeEatChar('r'.codeUnitAt(0)) && _maybeEatChar('-'.codeUnitAt(0)); |
- } |
- |
- bool varUsage(int ch) { |
- return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) && |
- _maybeEatChar('r'.codeUnitAt(0)) && (_peekChar() == '-'.codeUnitAt(0)); |
- } |
- |
- Token _errorToken([String message = null]) { |
- return _finishToken(TokenKind.ERROR); |
- } |
- |
- int getIdentifierKind() { |
- // Is the identifier a unit type? |
- int tokId = -1; |
- |
- // Don't match units in selectors or selector expressions. |
- if (!inSelectorExpression && !inSelector) { |
- tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex); |
- } |
- if (tokId == -1) { |
- tokId = (_text.substring(_startIndex, _index) == '!important') ? |
- TokenKind.IMPORTANT : -1; |
- } |
- |
- return tokId >= 0 ? tokId : TokenKind.IDENTIFIER; |
- } |
- |
- Token finishIdentifier() { |
- // If we encounter an escape sequence, remember it so we can post-process |
- // to unescape. |
- bool hasEscapedChars = false; |
- var chars = []; |
- |
- // backup so we can start with the first character |
- int validateFrom = _index; |
- _index = _startIndex; |
- while (_index < _text.length) { |
- int ch = _text.codeUnitAt(_index); |
- |
- // If the previous character was "\" we need to escape. T |
- // http://www.w3.org/TR/CSS21/syndata.html#characters |
- // if followed by hexadecimal digits, create the appropriate character. |
- // otherwise, include the character in the identifier and don't treat it |
- // specially. |
- if (ch == 92/*\*/) { |
- int startHex = ++_index; |
- eatHexDigits(startHex + 6); |
- if (_index != startHex) { |
- // Parse the hex digits and add that character. |
- chars.add(int.parse('0x' + _text.substring(startHex, _index))); |
- |
- if (_index == _text.length) break; |
- |
- // if we stopped the hex because of a whitespace char, skip it |
- ch = _text.codeUnitAt(_index); |
- if (_index - startHex != 6 && |
- (ch == TokenChar.SPACE || ch == TokenChar.TAB || |
- ch == TokenChar.RETURN || ch == TokenChar.NEWLINE)) { |
- _index++; |
- } |
- } else { |
- // not a digit, just add the next character literally |
- if (_index == _text.length) break; |
- chars.add(_text.codeUnitAt(_index++)); |
- } |
- } else if (_index < validateFrom || (inSelectorExpression |
- ? TokenizerHelpers.isIdentifierPartExpr(ch) |
- : TokenizerHelpers.isIdentifierPart(ch))) { |
- chars.add(ch); |
- _index++; |
- } else { |
- // Not an identifier or escaped character. |
- break; |
- } |
- } |
- |
- var span = _file.span(_startIndex, _index); |
- var text = new String.fromCharCodes(chars); |
- |
- return new IdentifierToken(text, getIdentifierKind(), span); |
- } |
- |
- Token finishNumber() { |
- eatDigits(); |
- |
- if (_peekChar() == 46/*.*/) { |
- // Handle the case of 1.toString(). |
- _nextChar(); |
- if (TokenizerHelpers.isDigit(_peekChar())) { |
- eatDigits(); |
- return _finishToken(TokenKind.DOUBLE); |
- } else { |
- _index -= 1; |
- } |
- } |
- |
- return _finishToken(TokenKind.INTEGER); |
- } |
- |
- bool maybeEatDigit() { |
- if (_index < _text.length |
- && TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) { |
- _index += 1; |
- return true; |
- } |
- return false; |
- } |
- |
- Token finishHexNumber() { |
- eatHexDigits(_text.length); |
- return _finishToken(TokenKind.HEX_INTEGER); |
- } |
- |
- void eatHexDigits(int end) { |
- end = math.min(end, _text.length); |
- while (_index < end) { |
- if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) { |
- _index += 1; |
- } else { |
- return; |
- } |
- } |
- } |
- |
- bool maybeEatHexDigit() { |
- if (_index < _text.length |
- && TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) { |
- _index += 1; |
- return true; |
- } |
- return false; |
- } |
- |
- bool maybeEatQuestionMark() { |
- if (_index < _text.length && |
- _text.codeUnitAt(_index) == QUESTION_MARK) { |
- _index += 1; |
- return true; |
- } |
- return false; |
- } |
- |
- void eatQuestionMarks() { |
- while (_index < _text.length) { |
- if (_text.codeUnitAt(_index) == QUESTION_MARK) { |
- _index += 1; |
- } else { |
- return; |
- } |
- } |
- } |
- |
- Token finishUnicodeRange() { |
- eatQuestionMarks(); |
- return _finishToken(TokenKind.HEX_RANGE); |
- } |
- |
- Token finishMultiLineComment() { |
- while (true) { |
- int ch = _nextChar(); |
- if (ch == 0) { |
- return _finishToken(TokenKind.INCOMPLETE_COMMENT); |
- } else if (ch == 42/*'*'*/) { |
- if (_maybeEatChar(47/*'/'*/)) { |
- if (_skipWhitespace) { |
- return next(); |
- } else { |
- return _finishToken(TokenKind.COMMENT); |
- } |
- } |
- } else if (ch == TokenChar.MINUS) { |
- /* Check if close part of Comment Definition --> (CDC). */ |
- if (_maybeEatChar(TokenChar.MINUS)) { |
- if (_maybeEatChar(TokenChar.GREATER)) { |
- if (_skipWhitespace) { |
- return next(); |
- } else { |
- return _finishToken(TokenKind.HTML_COMMENT); |
- } |
- } |
- } |
- } |
- } |
- return _errorToken(); |
- } |
- |
-} |
- |
-/** Static helper methods. */ |
-class TokenizerHelpers { |
- static bool isIdentifierStart(int c) { |
- return isIdentifierStartExpr(c) || c == 45 /*-*/; |
- } |
- |
- static bool isDigit(int c) { |
- return (c >= 48/*0*/ && c <= 57/*9*/); |
- } |
- |
- static bool isHexDigit(int c) { |
- return (isDigit(c) || (c >= 97/*a*/ && c <= 102/*f*/) |
- || (c >= 65/*A*/ && c <= 70/*F*/)); |
- } |
- |
- static bool isIdentifierPart(int c) { |
- return isIdentifierPartExpr(c) || c == 45 /*-*/; |
- } |
- |
- /** Pseudo function expressions identifiers can't have a minus sign. */ |
- static bool isIdentifierStartExpr(int c) { |
- return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) || |
- // Note: Unicode 10646 chars U+00A0 or higher are allowed, see: |
- // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier |
- // http://www.w3.org/TR/CSS21/syndata.html#characters |
- // Also, escaped character should be allowed. |
- c == 95/*_*/ || c >= 0xA0 || c == 92/*\*/); |
- } |
- |
- /** Pseudo function expressions identifiers can't have a minus sign. */ |
- static bool isIdentifierPartExpr(int c) { |
- return (isIdentifierStartExpr(c) || isDigit(c)); |
- } |
-} |