| Index: pkg/csslib/lib/src/tokenizer.dart
|
| diff --git a/pkg/csslib/lib/src/tokenizer.dart b/pkg/csslib/lib/src/tokenizer.dart
|
| index e3cdfdca171a6e761eca9ad1c16ddb8e6ab7b101..8d929abeb92a35c7420260f359183e0ecaed8fa8 100644
|
| --- a/pkg/csslib/lib/src/tokenizer.dart
|
| +++ b/pkg/csslib/lib/src/tokenizer.dart
|
| @@ -41,7 +41,7 @@ class Tokenizer extends TokenizerBase {
|
|
|
| _startIndex = _index;
|
| ch = _nextChar();
|
| - Token ident = this.finishIdentifier(ch);
|
| + Token ident = finishIdentifier();
|
|
|
| // Is it a directive?
|
| int tokId = TokenKind.matchDirectives(_text, _startIndex,
|
| @@ -101,7 +101,7 @@ class Tokenizer extends TokenizerBase {
|
| if (maybeEatDigit()) return finishNumber();
|
| return _finishToken(TokenKind.PLUS);
|
| case TokenChar.MINUS:
|
| - if (selectorExpression || unicodeRange) {
|
| + if (inSelectorExpression || unicodeRange) {
|
| // If parsing in pseudo function expression then minus is an operator
|
| // not part of identifier e.g., interval value range (e.g. U+400-4ff)
|
| // or minus operator in selector expression.
|
| @@ -109,7 +109,7 @@ class Tokenizer extends TokenizerBase {
|
| } else if (maybeEatDigit()) {
|
| return finishNumber();
|
| } else if (TokenizerHelpers.isIdentifierStart(ch)) {
|
| - return this.finishIdentifier(ch);
|
| + return finishIdentifier();
|
| }
|
| return _finishToken(TokenKind.MINUS);
|
| case TokenChar.GREATER:
|
| @@ -127,6 +127,9 @@ class Tokenizer extends TokenizerBase {
|
| case TokenChar.AMPERSAND:
|
| return _finishToken(TokenKind.AMPERSAND);
|
| case TokenChar.NAMESPACE:
|
| + if (_maybeEatChar(TokenChar.EQUALS)) {
|
| + return _finishToken(TokenKind.DASH_MATCH); // |=
|
| + }
|
| return _finishToken(TokenKind.NAMESPACE);
|
| case TokenChar.COLON:
|
| return _finishToken(TokenKind.COLON);
|
| @@ -162,11 +165,6 @@ class Tokenizer extends TokenizerBase {
|
| return _finishToken(TokenKind.LESS);
|
| case TokenChar.EQUALS:
|
| return _finishToken(TokenKind.EQUALS);
|
| - case TokenChar.OR:
|
| - if (_maybeEatChar(TokenChar.EQUALS)) {
|
| - return _finishToken(TokenKind.DASH_MATCH); // |=
|
| - }
|
| - return _finishToken(TokenKind.OR);
|
| case TokenChar.CARET:
|
| if (_maybeEatChar(TokenChar.EQUALS)) {
|
| return _finishToken(TokenKind.PREFIX_MATCH); // ^=
|
| @@ -178,11 +176,16 @@ class Tokenizer extends TokenizerBase {
|
| }
|
| return _finishToken(TokenKind.DOLLAR);
|
| case TokenChar.BANG:
|
| - Token tok = finishIdentifier(ch);
|
| + Token tok = finishIdentifier();
|
| return (tok == null) ? _finishToken(TokenKind.BANG) : tok;
|
| - case TokenChar.BACKSLASH:
|
| - return _finishToken(TokenKind.BACKSLASH);
|
| default:
|
| + // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's
|
| + // appropriate outside of a few specific places; certainly shouldn't
|
| + // be parsed in selectors.
|
| + if (!inSelector && ch == TokenChar.BACKSLASH) {
|
| + return _finishToken(TokenKind.BACKSLASH);
|
| + }
|
| +
|
| if (unicodeRange) {
|
| // Three types of unicode ranges:
|
| // - single code point (e.g. U+416)
|
| @@ -212,7 +215,7 @@ class Tokenizer extends TokenizerBase {
|
| } else if (varUsage(ch)) {
|
| return _finishToken(TokenKind.VAR_USAGE);
|
| } else if (TokenizerHelpers.isIdentifierStart(ch)) {
|
| - return finishIdentifier(ch);
|
| + return finishIdentifier();
|
| } else if (TokenizerHelpers.isDigit(ch)) {
|
| return finishNumber();
|
| }
|
| @@ -236,7 +239,12 @@ class Tokenizer extends TokenizerBase {
|
|
|
| int getIdentifierKind() {
|
| // Is the identifier a unit type?
|
| - int tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);
|
| + int tokId = -1;
|
| +
|
| + // Don't match units in selectors or selector expressions.
|
| + if (!inSelectorExpression && !inSelector) {
|
| + tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);
|
| + }
|
| if (tokId == -1) {
|
| tokId = (_text.substring(_startIndex, _index) == '!important') ?
|
| TokenKind.IMPORTANT : -1;
|
| @@ -245,31 +253,59 @@ class Tokenizer extends TokenizerBase {
|
| return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;
|
| }
|
|
|
| - // Need to override so CSS version of isIdentifierPart is used.
|
| - Token finishIdentifier(int ch) {
|
| + Token finishIdentifier() {
|
| + // If we encounter an escape sequence, remember it so we can post-process
|
| + // to unescape.
|
| + bool hasEscapedChars = false;
|
| + var chars = [];
|
| +
|
| + // backup so we can start with the first character
|
| + int validateFrom = _index;
|
| + _index = _startIndex;
|
| while (_index < _text.length) {
|
| - // If parsing in pseudo function expression then minus is an operator
|
| - // not part of identifier.
|
| - var isIdentifier = selectorExpression
|
| - ? TokenizerHelpers.isIdentifierPartExpr(_text.codeUnitAt(_index))
|
| - : TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index));
|
| - if (!isIdentifier) {
|
| - break;
|
| + int ch = _text.codeUnitAt(_index);
|
| +
|
| + // If the previous character was "\" we need to escape. T
|
| + // http://www.w3.org/TR/CSS21/syndata.html#characters
|
| + // if followed by hexadecimal digits, create the appropriate character.
|
| + // otherwise, include the character in the identifier and don't treat it
|
| + // specially.
|
| + if (ch == 92/*\*/) {
|
| + int startHex = ++_index;
|
| + eatHexDigits(startHex + 6);
|
| + if (_index != startHex) {
|
| + // Parse the hex digits and add that character.
|
| + chars.add(int.parse('0x' + _text.substring(startHex, _index)));
|
| +
|
| + if (_index == _text.length) break;
|
| +
|
| + // if we stopped the hex because of a whitespace char, skip it
|
| + ch = _text.codeUnitAt(_index);
|
| + if (_index - startHex != 6 &&
|
| + (ch == TokenChar.SPACE || ch == TokenChar.TAB ||
|
| + ch == TokenChar.RETURN || ch == TokenChar.NEWLINE)) {
|
| + _index++;
|
| + }
|
| + } else {
|
| + // not a digit, just add the next character literally
|
| + if (_index == _text.length) break;
|
| + chars.add(_text.codeUnitAt(_index++));
|
| + }
|
| + } else if (_index < validateFrom || (inSelectorExpression
|
| + ? TokenizerHelpers.isIdentifierPartExpr(ch)
|
| + : TokenizerHelpers.isIdentifierPart(ch))) {
|
| + chars.add(ch);
|
| + _index++;
|
| } else {
|
| - _index += 1;
|
| + // Not an identifier or escaped character.
|
| + break;
|
| }
|
| }
|
|
|
| - int kind = getIdentifierKind();
|
| - if (kind == TokenKind.IDENTIFIER) {
|
| - return _finishToken(TokenKind.IDENTIFIER);
|
| - } else {
|
| - return _finishToken(kind);
|
| - }
|
| - }
|
| -
|
| - Token finishImportant() {
|
| + var span = _file.span(_startIndex, _index);
|
| + var text = new String.fromCharCodes(chars);
|
|
|
| + return new IdentifierToken(text, getIdentifierKind(), span);
|
| }
|
|
|
| Token finishNumber() {
|
| @@ -299,12 +335,13 @@ class Tokenizer extends TokenizerBase {
|
| }
|
|
|
| Token finishHexNumber() {
|
| - eatHexDigits();
|
| + eatHexDigits(_text.length);
|
| return _finishToken(TokenKind.HEX_INTEGER);
|
| }
|
|
|
| - void eatHexDigits() {
|
| - while (_index < _text.length) {
|
| + void eatHexDigits(int end) {
|
| + end = math.min(end, _text.length);
|
| + while (_index < end) {
|
| if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
|
| _index += 1;
|
| } else {
|
| @@ -399,7 +436,11 @@ class TokenizerHelpers {
|
| /** Pseudo function expressions identifiers can't have a minus sign. */
|
| static bool isIdentifierStartExpr(int c) {
|
| return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) ||
|
| - c == 95/*_*/);
|
| + // Note: Unicode 10646 chars U+00A0 or higher are allowed, see:
|
| + // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier
|
| + // http://www.w3.org/TR/CSS21/syndata.html#characters
|
| + // Also, escaped character should be allowed.
|
| + c == 95/*_*/ || c >= 0xA0 || c == 92/*\*/);
|
| }
|
|
|
| /** Pseudo function expressions identifiers can't have a minus sign. */
|
|
|