Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(50)

Unified Diff: pkg/csslib/lib/src/tokenizer.dart

Issue 268623002: [html5lib] implement querySelector/querySelectorAll (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « pkg/csslib/lib/src/token.dart ('k') | pkg/csslib/lib/src/tokenizer_base.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: pkg/csslib/lib/src/tokenizer.dart
diff --git a/pkg/csslib/lib/src/tokenizer.dart b/pkg/csslib/lib/src/tokenizer.dart
index e3cdfdca171a6e761eca9ad1c16ddb8e6ab7b101..8d929abeb92a35c7420260f359183e0ecaed8fa8 100644
--- a/pkg/csslib/lib/src/tokenizer.dart
+++ b/pkg/csslib/lib/src/tokenizer.dart
@@ -41,7 +41,7 @@ class Tokenizer extends TokenizerBase {
_startIndex = _index;
ch = _nextChar();
- Token ident = this.finishIdentifier(ch);
+ Token ident = finishIdentifier();
// Is it a directive?
int tokId = TokenKind.matchDirectives(_text, _startIndex,
@@ -101,7 +101,7 @@ class Tokenizer extends TokenizerBase {
if (maybeEatDigit()) return finishNumber();
return _finishToken(TokenKind.PLUS);
case TokenChar.MINUS:
- if (selectorExpression || unicodeRange) {
+ if (inSelectorExpression || unicodeRange) {
// If parsing in pseudo function expression then minus is an operator
// not part of identifier e.g., interval value range (e.g. U+400-4ff)
// or minus operator in selector expression.
@@ -109,7 +109,7 @@ class Tokenizer extends TokenizerBase {
} else if (maybeEatDigit()) {
return finishNumber();
} else if (TokenizerHelpers.isIdentifierStart(ch)) {
- return this.finishIdentifier(ch);
+ return finishIdentifier();
}
return _finishToken(TokenKind.MINUS);
case TokenChar.GREATER:
@@ -127,6 +127,9 @@ class Tokenizer extends TokenizerBase {
case TokenChar.AMPERSAND:
return _finishToken(TokenKind.AMPERSAND);
case TokenChar.NAMESPACE:
+ if (_maybeEatChar(TokenChar.EQUALS)) {
+ return _finishToken(TokenKind.DASH_MATCH); // |=
+ }
return _finishToken(TokenKind.NAMESPACE);
case TokenChar.COLON:
return _finishToken(TokenKind.COLON);
@@ -162,11 +165,6 @@ class Tokenizer extends TokenizerBase {
return _finishToken(TokenKind.LESS);
case TokenChar.EQUALS:
return _finishToken(TokenKind.EQUALS);
- case TokenChar.OR:
- if (_maybeEatChar(TokenChar.EQUALS)) {
- return _finishToken(TokenKind.DASH_MATCH); // |=
- }
- return _finishToken(TokenKind.OR);
case TokenChar.CARET:
if (_maybeEatChar(TokenChar.EQUALS)) {
return _finishToken(TokenKind.PREFIX_MATCH); // ^=
@@ -178,11 +176,16 @@ class Tokenizer extends TokenizerBase {
}
return _finishToken(TokenKind.DOLLAR);
case TokenChar.BANG:
- Token tok = finishIdentifier(ch);
+ Token tok = finishIdentifier();
return (tok == null) ? _finishToken(TokenKind.BANG) : tok;
- case TokenChar.BACKSLASH:
- return _finishToken(TokenKind.BACKSLASH);
default:
+ // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's
+ // appropriate outside of a few specific places; certainly shouldn't
+ // be parsed in selectors.
+ if (!inSelector && ch == TokenChar.BACKSLASH) {
+ return _finishToken(TokenKind.BACKSLASH);
+ }
+
if (unicodeRange) {
// Three types of unicode ranges:
// - single code point (e.g. U+416)
@@ -212,7 +215,7 @@ class Tokenizer extends TokenizerBase {
} else if (varUsage(ch)) {
return _finishToken(TokenKind.VAR_USAGE);
} else if (TokenizerHelpers.isIdentifierStart(ch)) {
- return finishIdentifier(ch);
+ return finishIdentifier();
} else if (TokenizerHelpers.isDigit(ch)) {
return finishNumber();
}
@@ -236,7 +239,12 @@ class Tokenizer extends TokenizerBase {
int getIdentifierKind() {
// Is the identifier a unit type?
- int tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);
+ int tokId = -1;
+
+ // Don't match units in selectors or selector expressions.
+ if (!inSelectorExpression && !inSelector) {
+ tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);
+ }
if (tokId == -1) {
tokId = (_text.substring(_startIndex, _index) == '!important') ?
TokenKind.IMPORTANT : -1;
@@ -245,31 +253,59 @@ class Tokenizer extends TokenizerBase {
return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;
}
- // Need to override so CSS version of isIdentifierPart is used.
- Token finishIdentifier(int ch) {
+ Token finishIdentifier() {
+ // If we encounter an escape sequence, remember it so we can post-process
+ // to unescape.
+ bool hasEscapedChars = false;
+ var chars = [];
+
+ // backup so we can start with the first character
+ int validateFrom = _index;
+ _index = _startIndex;
while (_index < _text.length) {
- // If parsing in pseudo function expression then minus is an operator
- // not part of identifier.
- var isIdentifier = selectorExpression
- ? TokenizerHelpers.isIdentifierPartExpr(_text.codeUnitAt(_index))
- : TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index));
- if (!isIdentifier) {
- break;
+ int ch = _text.codeUnitAt(_index);
+
+ // If the previous character was "\" we need to escape. T
+ // http://www.w3.org/TR/CSS21/syndata.html#characters
+ // if followed by hexadecimal digits, create the appropriate character.
+ // otherwise, include the character in the identifier and don't treat it
+ // specially.
+ if (ch == 92/*\*/) {
+ int startHex = ++_index;
+ eatHexDigits(startHex + 6);
+ if (_index != startHex) {
+ // Parse the hex digits and add that character.
+ chars.add(int.parse('0x' + _text.substring(startHex, _index)));
+
+ if (_index == _text.length) break;
+
+ // if we stopped the hex because of a whitespace char, skip it
+ ch = _text.codeUnitAt(_index);
+ if (_index - startHex != 6 &&
+ (ch == TokenChar.SPACE || ch == TokenChar.TAB ||
+ ch == TokenChar.RETURN || ch == TokenChar.NEWLINE)) {
+ _index++;
+ }
+ } else {
+ // not a digit, just add the next character literally
+ if (_index == _text.length) break;
+ chars.add(_text.codeUnitAt(_index++));
+ }
+ } else if (_index < validateFrom || (inSelectorExpression
+ ? TokenizerHelpers.isIdentifierPartExpr(ch)
+ : TokenizerHelpers.isIdentifierPart(ch))) {
+ chars.add(ch);
+ _index++;
} else {
- _index += 1;
+ // Not an identifier or escaped character.
+ break;
}
}
- int kind = getIdentifierKind();
- if (kind == TokenKind.IDENTIFIER) {
- return _finishToken(TokenKind.IDENTIFIER);
- } else {
- return _finishToken(kind);
- }
- }
-
- Token finishImportant() {
+ var span = _file.span(_startIndex, _index);
+ var text = new String.fromCharCodes(chars);
+ return new IdentifierToken(text, getIdentifierKind(), span);
}
Token finishNumber() {
@@ -299,12 +335,13 @@ class Tokenizer extends TokenizerBase {
}
Token finishHexNumber() {
- eatHexDigits();
+ eatHexDigits(_text.length);
return _finishToken(TokenKind.HEX_INTEGER);
}
- void eatHexDigits() {
- while (_index < _text.length) {
+ void eatHexDigits(int end) {
+ end = math.min(end, _text.length);
+ while (_index < end) {
if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
_index += 1;
} else {
@@ -399,7 +436,11 @@ class TokenizerHelpers {
/** Pseudo function expressions identifiers can't have a minus sign. */
static bool isIdentifierStartExpr(int c) {
return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) ||
- c == 95/*_*/);
+ // Note: Unicode 10646 chars U+00A0 or higher are allowed, see:
+ // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier
+ // http://www.w3.org/TR/CSS21/syndata.html#characters
+ // Also, escaped character should be allowed.
+ c == 95/*_*/ || c >= 0xA0 || c == 92/*\*/);
}
/** Pseudo function expressions identifiers can't have a minus sign. */
« no previous file with comments | « pkg/csslib/lib/src/token.dart ('k') | pkg/csslib/lib/src/tokenizer_base.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698