Chromium Code Reviews| Index: utils/css/tokenizer.dart |
| diff --git a/utils/css/tokenizer.dart b/utils/css/tokenizer.dart |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..8446fe17ebc6b3d1597cd9beba97a72503677edb |
| --- /dev/null |
| +++ b/utils/css/tokenizer.dart |
| @@ -0,0 +1,134 @@ |
| +// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| +// for details. All rights reserved. Use of this source code is governed by a |
| +// BSD-style license that can be found in the LICENSE file. |
| + |
| +class Tokenizer extends lang.TokenizerBase { |
| + TokenKind cssTokens; |
| + |
| + Tokenizer(lang.SourceFile source, bool skipWhitespace, [int index = 0]) |
| + : super(source, skipWhitespace, index) { |
| + cssTokens = new TokenKind(); |
| + } |
| + |
| + lang.Token next() { |
| + // keep track of our starting position |
| + _startIndex = _index; |
| + |
| + if (_interpStack != null && _interpStack.depth == 0) { |
| + var istack = _interpStack; |
| + _interpStack = _interpStack.pop(); |
| +/* |
|
jimhug
2011/11/10 17:58:56
Is this relevant at all for css?
terry
2011/11/16 14:00:22
I think we might want this for string interpolatio
|
| + if (istack.isMultiline) { |
| + return finishMultilineStringBody(istack.quote); |
| + } else { |
| + return finishStringBody(istack.quote); |
| + } |
| +*/ |
| + } |
| + |
| + int ch; |
| + ch = _nextChar(); |
| + switch(ch) { |
| + case 0: |
| + return _finishToken(TokenKind.END_OF_FILE); |
| + case TokenizerHelpers.WS_SPACE: |
| + case TokenizerHelpers.WS_TAB: |
| + case TokenizerHelpers.WS_NEWLINE: |
| + case TokenizerHelpers.WS_RETURN: |
| + return finishWhitespace(); |
| + case cssTokens.tokens[TokenKind.END_OF_FILE]: |
| + return _finishToken(TokenKind.END_OF_FILE); |
| + case cssTokens.tokens[TokenKind.AT]: |
| + return _finishToken(TokenKind.AT); |
| + case cssTokens.tokens[TokenKind.DOT]: |
| + return _finishToken(TokenKind.DOT); |
| + case cssTokens.tokens[TokenKind.LBRACE]: |
| + return _finishToken(TokenKind.LBRACE); |
| + case cssTokens.tokens[TokenKind.RBRACE]: |
| + return _finishToken(TokenKind.RBRACE); |
| + case cssTokens.tokens[TokenKind.HASH]: |
| + return _finishToken(TokenKind.HASH); |
| + case cssTokens.tokens[TokenKind.COMBINATOR_PLUS]: |
| + return _finishToken(TokenKind.COMBINATOR_PLUS); |
| + case cssTokens.tokens[TokenKind.COMBINATOR_GREATER]: |
| + return _finishToken(TokenKind.COMBINATOR_GREATER); |
| + case cssTokens.tokens[TokenKind.COMBINATOR_TILDE]: |
| + return _finishToken(TokenKind.COMBINATOR_TILDE); |
| + case cssTokens.tokens[TokenKind.ASTERISK]: |
| + return _finishToken(TokenKind.ASTERISK); |
| + case cssTokens.tokens[TokenKind.NAMESPACE]: |
| + return _finishToken(TokenKind.NAMESPACE); |
| + case cssTokens.tokens[TokenKind.PSEUDO]: |
| + return _finishToken(TokenKind.PSEUDO); |
| + case cssTokens.tokens[TokenKind.COMMA]: |
| + return _finishToken(TokenKind.COMMA); |
| + |
| + default: |
| + if (isIdentifierStart(ch)) { |
| + return this.finishIdentifier(); |
| + } else if (isDigit(ch)) { |
| + return this.finishNumber(); |
| + } else { |
| + return _errorToken(); |
| + } |
| + } |
| + } |
| + |
| + // TODO(jmesserly): we need a way to emit human readable error messages from |
| + // the tokenizer. |
| + lang.Token _errorToken() { |
| + return _finishToken(TokenKind.ERROR); |
| + } |
| + |
| + int getIdentifierKind() { |
| + return TokenKind.IDENTIFIER; |
| + } |
| + |
| + // Need to override so CSS version of isIdentifierPart is used. |
| + lang.Token finishIdentifier() { |
| + while (_index < _text.length) { |
| + if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) { |
| + _index--; |
| + break; |
| + } |
| + } |
| + int kind = getIdentifierKind(); |
| + if (_interpStack != null && _interpStack.depth == -1) { |
| + _interpStack.depth = 0; |
| + } |
| + if (kind == TokenKind.IDENTIFIER) { |
| + return _finishToken(TokenKind.IDENTIFIER); |
| + } else { |
| + return _finishToken(kind); |
| + } |
| + } |
| + |
| +} |
| + |
| +/** Static helper methods. */ |
| +class TokenizerHelpers { |
|
jimhug
2011/11/10 17:58:56
Any reason not to use lang.TokenizerHelpers here?
terry
2011/11/16 14:00:22
Good point.
Done.
|
| + final static int WS_SPACE = 32; // ' ' |
| + final static int WS_TAB = 9; // '\t' |
| + final static int WS_NEWLINE = 10; // '\n' |
| + final static int WS_RETURN = 13; // '\r' |
| + |
| + static bool isIdentifierStart(int c) { |
| + return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) || c == 95/*_*/); |
| + } |
| + |
| + static bool isDigit(int c) { |
| + return (c >= 48/*0*/ && c <= 57/*9*/); |
| + } |
| + |
| + static bool isHexDigit(int c) { |
| + return (isDigit(c) || (c >= 97/*a*/ && c <= 102/*f*/) || (c >= 65/*A*/ && c <= 70/*F*/)); |
| + } |
| + |
| + static bool isWhitespace(int c) { |
| + return (c == 32/*' '*/ || c == 9/*'\t'*/ || c == 10/*'\n'*/ || c == 13/*'\r'*/); |
| + } |
| + |
| + static bool isIdentifierPart(int c) { |
| + return (isIdentifierStart(c) || isDigit(c) || c == 45/*-*/); |
| + } |
| +} |