Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1143)

Unified Diff: pkg/csslib/lib/src/tokenizer.dart

Issue 814113004: Pull args, intl, logging, shelf, and source_maps out of the SDK. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Also csslib. Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « pkg/csslib/lib/src/token.dart ('k') | pkg/csslib/lib/src/tokenizer_base.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: pkg/csslib/lib/src/tokenizer.dart
diff --git a/pkg/csslib/lib/src/tokenizer.dart b/pkg/csslib/lib/src/tokenizer.dart
deleted file mode 100644
index 8d929abeb92a35c7420260f359183e0ecaed8fa8..0000000000000000000000000000000000000000
--- a/pkg/csslib/lib/src/tokenizer.dart
+++ /dev/null
@@ -1,450 +0,0 @@
-// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
-// for details. All rights reserved. Use of this source code is governed by a
-// BSD-style license that can be found in the LICENSE file.
-
-part of csslib.parser;
-
-class Tokenizer extends TokenizerBase {
- /** U+ prefix for unicode characters. */
- final UNICODE_U = 'U'.codeUnitAt(0);
- final UNICODE_LOWER_U = 'u'.codeUnitAt(0);
- final UNICODE_PLUS = '+'.codeUnitAt(0);
-
- final QUESTION_MARK = '?'.codeUnitAt(0);
-
- /** CDATA keyword. */
- final List CDATA_NAME = 'CDATA'.codeUnits;
-
- Tokenizer(SourceFile file, String text, bool skipWhitespace,
- [int index = 0])
- : super(file, text, skipWhitespace, index);
-
- Token next({unicodeRange: false}) {
- // keep track of our starting position
- _startIndex = _index;
-
- int ch;
- ch = _nextChar();
- switch (ch) {
- case TokenChar.NEWLINE:
- case TokenChar.RETURN:
- case TokenChar.SPACE:
- case TokenChar.TAB:
- return finishWhitespace();
- case TokenChar.END_OF_FILE:
- return _finishToken(TokenKind.END_OF_FILE);
- case TokenChar.AT:
- int peekCh = _peekChar();
- if (TokenizerHelpers.isIdentifierStart(peekCh)) {
- var oldIndex = _index;
- var oldStartIndex = _startIndex;
-
- _startIndex = _index;
- ch = _nextChar();
- Token ident = finishIdentifier();
-
- // Is it a directive?
- int tokId = TokenKind.matchDirectives(_text, _startIndex,
- _index - _startIndex);
- if (tokId == -1) {
- // No, is it a margin directive?
- tokId = TokenKind.matchMarginDirectives(_text, _startIndex,
- _index - _startIndex);
- }
-
- if (tokId != -1) {
- return _finishToken(tokId);
- } else {
- // Didn't find a CSS directive or margin directive so the @name is
- // probably the Less definition '@name: value_variable_definition'.
- _startIndex = oldStartIndex;
- _index = oldIndex;
- }
- }
- return _finishToken(TokenKind.AT);
- case TokenChar.DOT:
- int start = _startIndex; // Start where the dot started.
- if (maybeEatDigit()) {
- // looks like a number dot followed by digit(s).
- Token number = finishNumber();
- if (number.kind == TokenKind.INTEGER) {
- // It's a number but it's preceeded by a dot, so make it a double.
- _startIndex = start;
- return _finishToken(TokenKind.DOUBLE);
- } else {
- // Don't allow dot followed by a double (e.g, '..1').
- return _errorToken();
- }
- }
- // It's really a dot.
- return _finishToken(TokenKind.DOT);
- case TokenChar.LPAREN:
- return _finishToken(TokenKind.LPAREN);
- case TokenChar.RPAREN:
- return _finishToken(TokenKind.RPAREN);
- case TokenChar.LBRACE:
- return _finishToken(TokenKind.LBRACE);
- case TokenChar.RBRACE:
- return _finishToken(TokenKind.RBRACE);
- case TokenChar.LBRACK:
- return _finishToken(TokenKind.LBRACK);
- case TokenChar.RBRACK:
- if (_maybeEatChar(TokenChar.RBRACK) &&
- _maybeEatChar(TokenChar.GREATER)) {
- // ]]>
- return next();
- }
- return _finishToken(TokenKind.RBRACK);
- case TokenChar.HASH:
- return _finishToken(TokenKind.HASH);
- case TokenChar.PLUS:
- if (maybeEatDigit()) return finishNumber();
- return _finishToken(TokenKind.PLUS);
- case TokenChar.MINUS:
- if (inSelectorExpression || unicodeRange) {
- // If parsing in pseudo function expression then minus is an operator
- // not part of identifier e.g., interval value range (e.g. U+400-4ff)
- // or minus operator in selector expression.
- return _finishToken(TokenKind.MINUS);
- } else if (maybeEatDigit()) {
- return finishNumber();
- } else if (TokenizerHelpers.isIdentifierStart(ch)) {
- return finishIdentifier();
- }
- return _finishToken(TokenKind.MINUS);
- case TokenChar.GREATER:
- return _finishToken(TokenKind.GREATER);
- case TokenChar.TILDE:
- if (_maybeEatChar(TokenChar.EQUALS)) {
- return _finishToken(TokenKind.INCLUDES); // ~=
- }
- return _finishToken(TokenKind.TILDE);
- case TokenChar.ASTERISK:
- if (_maybeEatChar(TokenChar.EQUALS)) {
- return _finishToken(TokenKind.SUBSTRING_MATCH); // *=
- }
- return _finishToken(TokenKind.ASTERISK);
- case TokenChar.AMPERSAND:
- return _finishToken(TokenKind.AMPERSAND);
- case TokenChar.NAMESPACE:
- if (_maybeEatChar(TokenChar.EQUALS)) {
- return _finishToken(TokenKind.DASH_MATCH); // |=
- }
- return _finishToken(TokenKind.NAMESPACE);
- case TokenChar.COLON:
- return _finishToken(TokenKind.COLON);
- case TokenChar.COMMA:
- return _finishToken(TokenKind.COMMA);
- case TokenChar.SEMICOLON:
- return _finishToken(TokenKind.SEMICOLON);
- case TokenChar.PERCENT:
- return _finishToken(TokenKind.PERCENT);
- case TokenChar.SINGLE_QUOTE:
- return _finishToken(TokenKind.SINGLE_QUOTE);
- case TokenChar.DOUBLE_QUOTE:
- return _finishToken(TokenKind.DOUBLE_QUOTE);
- case TokenChar.SLASH:
- if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment();
- return _finishToken(TokenKind.SLASH);
- case TokenChar.LESS: // <!--
- if (_maybeEatChar(TokenChar.BANG)) {
- if (_maybeEatChar(TokenChar.MINUS) &&
- _maybeEatChar(TokenChar.MINUS)) {
- return finishMultiLineComment();
- } else if (_maybeEatChar(TokenChar.LBRACK) &&
- _maybeEatChar(CDATA_NAME[0]) &&
- _maybeEatChar(CDATA_NAME[1]) &&
- _maybeEatChar(CDATA_NAME[2]) &&
- _maybeEatChar(CDATA_NAME[3]) &&
- _maybeEatChar(CDATA_NAME[4]) &&
- _maybeEatChar(TokenChar.LBRACK)) {
- // <![CDATA[
- return next();
- }
- }
- return _finishToken(TokenKind.LESS);
- case TokenChar.EQUALS:
- return _finishToken(TokenKind.EQUALS);
- case TokenChar.CARET:
- if (_maybeEatChar(TokenChar.EQUALS)) {
- return _finishToken(TokenKind.PREFIX_MATCH); // ^=
- }
- return _finishToken(TokenKind.CARET);
- case TokenChar.DOLLAR:
- if (_maybeEatChar(TokenChar.EQUALS)) {
- return _finishToken(TokenKind.SUFFIX_MATCH); // $=
- }
- return _finishToken(TokenKind.DOLLAR);
- case TokenChar.BANG:
- Token tok = finishIdentifier();
- return (tok == null) ? _finishToken(TokenKind.BANG) : tok;
- default:
- // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's
- // appropriate outside of a few specific places; certainly shouldn't
- // be parsed in selectors.
- if (!inSelector && ch == TokenChar.BACKSLASH) {
- return _finishToken(TokenKind.BACKSLASH);
- }
-
- if (unicodeRange) {
- // Three types of unicode ranges:
- // - single code point (e.g. U+416)
- // - interval value range (e.g. U+400-4ff)
- // - range where trailing ‘?’ characters imply ‘any digit value’
- // (e.g. U+4??)
- if (maybeEatHexDigit()) {
- var t = finishHexNumber();
- // Any question marks then it's a HEX_RANGE not HEX_NUMBER.
- if (maybeEatQuestionMark()) finishUnicodeRange();
- return t;
- } else if (maybeEatQuestionMark()) {
- // HEX_RANGE U+N???
- return finishUnicodeRange();
- } else {
- return _errorToken();
- }
- } else if ((ch == UNICODE_U || ch == UNICODE_LOWER_U) &&
- (_peekChar() == UNICODE_PLUS)) {
- // Unicode range: U+uNumber[-U+uNumber]
- // uNumber = 0..10FFFF
- _nextChar(); // Skip +
- _startIndex = _index; // Starts at the number
- return _finishToken(TokenKind.UNICODE_RANGE);
- } else if (varDef(ch)) {
- return _finishToken(TokenKind.VAR_DEFINITION);
- } else if (varUsage(ch)) {
- return _finishToken(TokenKind.VAR_USAGE);
- } else if (TokenizerHelpers.isIdentifierStart(ch)) {
- return finishIdentifier();
- } else if (TokenizerHelpers.isDigit(ch)) {
- return finishNumber();
- }
- return _errorToken();
- }
- }
-
- bool varDef(int ch) {
- return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) &&
- _maybeEatChar('r'.codeUnitAt(0)) && _maybeEatChar('-'.codeUnitAt(0));
- }
-
- bool varUsage(int ch) {
- return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) &&
- _maybeEatChar('r'.codeUnitAt(0)) && (_peekChar() == '-'.codeUnitAt(0));
- }
-
- Token _errorToken([String message = null]) {
- return _finishToken(TokenKind.ERROR);
- }
-
- int getIdentifierKind() {
- // Is the identifier a unit type?
- int tokId = -1;
-
- // Don't match units in selectors or selector expressions.
- if (!inSelectorExpression && !inSelector) {
- tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);
- }
- if (tokId == -1) {
- tokId = (_text.substring(_startIndex, _index) == '!important') ?
- TokenKind.IMPORTANT : -1;
- }
-
- return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;
- }
-
- Token finishIdentifier() {
- // If we encounter an escape sequence, remember it so we can post-process
- // to unescape.
- bool hasEscapedChars = false;
- var chars = [];
-
- // backup so we can start with the first character
- int validateFrom = _index;
- _index = _startIndex;
- while (_index < _text.length) {
- int ch = _text.codeUnitAt(_index);
-
- // If the previous character was "\" we need to escape. T
- // http://www.w3.org/TR/CSS21/syndata.html#characters
- // if followed by hexadecimal digits, create the appropriate character.
- // otherwise, include the character in the identifier and don't treat it
- // specially.
- if (ch == 92/*\*/) {
- int startHex = ++_index;
- eatHexDigits(startHex + 6);
- if (_index != startHex) {
- // Parse the hex digits and add that character.
- chars.add(int.parse('0x' + _text.substring(startHex, _index)));
-
- if (_index == _text.length) break;
-
- // if we stopped the hex because of a whitespace char, skip it
- ch = _text.codeUnitAt(_index);
- if (_index - startHex != 6 &&
- (ch == TokenChar.SPACE || ch == TokenChar.TAB ||
- ch == TokenChar.RETURN || ch == TokenChar.NEWLINE)) {
- _index++;
- }
- } else {
- // not a digit, just add the next character literally
- if (_index == _text.length) break;
- chars.add(_text.codeUnitAt(_index++));
- }
- } else if (_index < validateFrom || (inSelectorExpression
- ? TokenizerHelpers.isIdentifierPartExpr(ch)
- : TokenizerHelpers.isIdentifierPart(ch))) {
- chars.add(ch);
- _index++;
- } else {
- // Not an identifier or escaped character.
- break;
- }
- }
-
- var span = _file.span(_startIndex, _index);
- var text = new String.fromCharCodes(chars);
-
- return new IdentifierToken(text, getIdentifierKind(), span);
- }
-
- Token finishNumber() {
- eatDigits();
-
- if (_peekChar() == 46/*.*/) {
- // Handle the case of 1.toString().
- _nextChar();
- if (TokenizerHelpers.isDigit(_peekChar())) {
- eatDigits();
- return _finishToken(TokenKind.DOUBLE);
- } else {
- _index -= 1;
- }
- }
-
- return _finishToken(TokenKind.INTEGER);
- }
-
- bool maybeEatDigit() {
- if (_index < _text.length
- && TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {
- _index += 1;
- return true;
- }
- return false;
- }
-
- Token finishHexNumber() {
- eatHexDigits(_text.length);
- return _finishToken(TokenKind.HEX_INTEGER);
- }
-
- void eatHexDigits(int end) {
- end = math.min(end, _text.length);
- while (_index < end) {
- if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
- _index += 1;
- } else {
- return;
- }
- }
- }
-
- bool maybeEatHexDigit() {
- if (_index < _text.length
- && TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
- _index += 1;
- return true;
- }
- return false;
- }
-
- bool maybeEatQuestionMark() {
- if (_index < _text.length &&
- _text.codeUnitAt(_index) == QUESTION_MARK) {
- _index += 1;
- return true;
- }
- return false;
- }
-
- void eatQuestionMarks() {
- while (_index < _text.length) {
- if (_text.codeUnitAt(_index) == QUESTION_MARK) {
- _index += 1;
- } else {
- return;
- }
- }
- }
-
- Token finishUnicodeRange() {
- eatQuestionMarks();
- return _finishToken(TokenKind.HEX_RANGE);
- }
-
- Token finishMultiLineComment() {
- while (true) {
- int ch = _nextChar();
- if (ch == 0) {
- return _finishToken(TokenKind.INCOMPLETE_COMMENT);
- } else if (ch == 42/*'*'*/) {
- if (_maybeEatChar(47/*'/'*/)) {
- if (_skipWhitespace) {
- return next();
- } else {
- return _finishToken(TokenKind.COMMENT);
- }
- }
- } else if (ch == TokenChar.MINUS) {
- /* Check if close part of Comment Definition --> (CDC). */
- if (_maybeEatChar(TokenChar.MINUS)) {
- if (_maybeEatChar(TokenChar.GREATER)) {
- if (_skipWhitespace) {
- return next();
- } else {
- return _finishToken(TokenKind.HTML_COMMENT);
- }
- }
- }
- }
- }
- return _errorToken();
- }
-
-}
-
-/** Static helper methods. */
-class TokenizerHelpers {
- static bool isIdentifierStart(int c) {
- return isIdentifierStartExpr(c) || c == 45 /*-*/;
- }
-
- static bool isDigit(int c) {
- return (c >= 48/*0*/ && c <= 57/*9*/);
- }
-
- static bool isHexDigit(int c) {
- return (isDigit(c) || (c >= 97/*a*/ && c <= 102/*f*/)
- || (c >= 65/*A*/ && c <= 70/*F*/));
- }
-
- static bool isIdentifierPart(int c) {
- return isIdentifierPartExpr(c) || c == 45 /*-*/;
- }
-
- /** Pseudo function expressions identifiers can't have a minus sign. */
- static bool isIdentifierStartExpr(int c) {
- return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) ||
- // Note: Unicode 10646 chars U+00A0 or higher are allowed, see:
- // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier
- // http://www.w3.org/TR/CSS21/syndata.html#characters
- // Also, escaped character should be allowed.
- c == 95/*_*/ || c >= 0xA0 || c == 92/*\*/);
- }
-
- /** Pseudo function expressions identifiers can't have a minus sign. */
- static bool isIdentifierPartExpr(int c) {
- return (isIdentifierStartExpr(c) || isDigit(c));
- }
-}
« no previous file with comments | « pkg/csslib/lib/src/token.dart ('k') | pkg/csslib/lib/src/tokenizer_base.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698