OLD | NEW |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 class Tokenizer extends lang.TokenizerBase { | 5 class Tokenizer extends lang.TokenizerBase { |
6 TokenKind cssTokens; | 6 TokenKind cssTokens; |
7 | 7 |
8 bool _selectorParsing; | 8 bool _selectorParsing; |
9 | 9 |
10 Tokenizer(lang.SourceFile source, bool skipWhitespace, [int index = 0]) | 10 Tokenizer(lang.SourceFile source, bool skipWhitespace, [int index = 0]) |
(...skipping 26 matching lines...) Expand all Loading... |
37 case cssTokens.tokens[TokenKind.SPACE]: | 37 case cssTokens.tokens[TokenKind.SPACE]: |
38 case cssTokens.tokens[TokenKind.TAB]: | 38 case cssTokens.tokens[TokenKind.TAB]: |
39 case cssTokens.tokens[TokenKind.NEWLINE]: | 39 case cssTokens.tokens[TokenKind.NEWLINE]: |
40 case cssTokens.tokens[TokenKind.RETURN]: | 40 case cssTokens.tokens[TokenKind.RETURN]: |
41 return finishWhitespace(); | 41 return finishWhitespace(); |
42 case cssTokens.tokens[TokenKind.END_OF_FILE]: | 42 case cssTokens.tokens[TokenKind.END_OF_FILE]: |
43 return _finishToken(TokenKind.END_OF_FILE); | 43 return _finishToken(TokenKind.END_OF_FILE); |
44 case cssTokens.tokens[TokenKind.AT]: | 44 case cssTokens.tokens[TokenKind.AT]: |
45 return _finishToken(TokenKind.AT); | 45 return _finishToken(TokenKind.AT); |
46 case cssTokens.tokens[TokenKind.DOT]: | 46 case cssTokens.tokens[TokenKind.DOT]: |
47 return _finishToken(TokenKind.DOT); | 47 int start = _startIndex; // Start where the dot started. |
| 48 if (maybeEatDigit()) { |
| 49 // looks like a number dot followed by digit(s). |
| 50 lang.Token num = finishNumber(); |
| 51 if (num.kind == TokenKind.INTEGER) { |
| 52 // It's a number but it's preceeded by a dot, so make it a double. |
| 53 _startIndex = start; |
| 54 return _finishToken(TokenKind.DOUBLE); |
| 55 } else { |
| 56 // Don't allow dot followed by a double (e.g, '..1'). |
| 57 return _errorToken(); |
| 58 } |
| 59 } else { |
| 60 // It's really a dot. |
| 61 return _finishToken(TokenKind.DOT); |
| 62 } |
| 63 case cssTokens.tokens[TokenKind.LPAREN]: |
| 64 return _finishToken(TokenKind.LPAREN); |
| 65 case cssTokens.tokens[TokenKind.RPAREN]: |
| 66 return _finishToken(TokenKind.RPAREN); |
48 case cssTokens.tokens[TokenKind.LBRACE]: | 67 case cssTokens.tokens[TokenKind.LBRACE]: |
49 return _finishToken(TokenKind.LBRACE); | 68 return _finishToken(TokenKind.LBRACE); |
50 case cssTokens.tokens[TokenKind.RBRACE]: | 69 case cssTokens.tokens[TokenKind.RBRACE]: |
51 return _finishToken(TokenKind.RBRACE); | 70 return _finishToken(TokenKind.RBRACE); |
| 71 case cssTokens.tokens[TokenKind.LBRACK]: |
| 72 return _finishToken(TokenKind.LBRACK); |
| 73 case cssTokens.tokens[TokenKind.RBRACK]: |
| 74 return _finishToken(TokenKind.RBRACK); |
52 case cssTokens.tokens[TokenKind.HASH]: | 75 case cssTokens.tokens[TokenKind.HASH]: |
53 return _finishToken(TokenKind.HASH); | 76 return _finishToken(TokenKind.HASH); |
54 case cssTokens.tokens[TokenKind.COMBINATOR_PLUS]: | 77 case cssTokens.tokens[TokenKind.PLUS]: |
55 return _finishToken(TokenKind.COMBINATOR_PLUS); | 78 if (maybeEatDigit()) { |
56 case cssTokens.tokens[TokenKind.COMBINATOR_GREATER]: | 79 return finishNumber(); |
57 return _finishToken(TokenKind.COMBINATOR_GREATER); | 80 } else { |
58 case cssTokens.tokens[TokenKind.COMBINATOR_TILDE]: | 81 return _finishToken(TokenKind.PLUS); |
59 return _finishToken(TokenKind.COMBINATOR_TILDE); | 82 } |
| 83 case cssTokens.tokens[TokenKind.MINUS]: |
| 84 if (maybeEatDigit()) { |
| 85 return finishNumber(); |
| 86 } else if (TokenizerHelpers.isIdentifierStart(ch)) { |
| 87 return this.finishIdentifier(); |
| 88 } else { |
| 89 return _finishToken(TokenKind.MINUS); |
| 90 } |
| 91 case cssTokens.tokens[TokenKind.GREATER]: |
| 92 return _finishToken(TokenKind.GREATER); |
| 93 case cssTokens.tokens[TokenKind.TILDE]: |
| 94 if (_maybeEatChar(cssTokens.tokens[TokenKind.EQUALS])) { |
| 95 return _finishToken(TokenKind.INCLUDES); // ~= |
| 96 } else { |
| 97 return _finishToken(TokenKind.TILDE); |
| 98 } |
60 case cssTokens.tokens[TokenKind.ASTERISK]: | 99 case cssTokens.tokens[TokenKind.ASTERISK]: |
61 return _finishToken(TokenKind.ASTERISK); | 100 if (_maybeEatChar(cssTokens.tokens[TokenKind.EQUALS])) { |
| 101 return _finishToken(TokenKind.SUBSTRING_MATCH); // *= |
| 102 } else { |
| 103 return _finishToken(TokenKind.ASTERISK); |
| 104 } |
62 case cssTokens.tokens[TokenKind.NAMESPACE]: | 105 case cssTokens.tokens[TokenKind.NAMESPACE]: |
63 return _finishToken(TokenKind.NAMESPACE); | 106 return _finishToken(TokenKind.NAMESPACE); |
64 case cssTokens.tokens[TokenKind.PSEUDO]: | 107 case cssTokens.tokens[TokenKind.COLON]: |
65 return _finishToken(TokenKind.PSEUDO); | 108 return _finishToken(TokenKind.COLON); |
66 case cssTokens.tokens[TokenKind.COMMA]: | 109 case cssTokens.tokens[TokenKind.COMMA]: |
67 return _finishToken(TokenKind.COMMA); | 110 return _finishToken(TokenKind.COMMA); |
68 | 111 case cssTokens.tokens[TokenKind.SEMICOLON]: |
| 112 return _finishToken(TokenKind.SEMICOLON); |
| 113 case cssTokens.tokens[TokenKind.PERCENT]: |
| 114 return _finishToken(TokenKind.PERCENT); |
| 115 case cssTokens.tokens[TokenKind.SINGLE_QUOTE]: |
| 116 return _finishToken(TokenKind.SINGLE_QUOTE); |
| 117 case cssTokens.tokens[TokenKind.DOUBLE_QUOTE]: |
| 118 return _finishToken(TokenKind.DOUBLE_QUOTE); |
| 119 case cssTokens.tokens[TokenKind.SLASH]: |
| 120 if (_maybeEatChar(cssTokens.tokens[TokenKind.ASTERISK])) { |
| 121 return finishMultiLineComment(); |
| 122 } else { |
| 123 return _finishToken(TokenKind.SLASH); |
| 124 } |
| 125 case cssTokens.tokens[TokenKind.LESS]: // <!-- |
| 126 if (_maybeEatChar(cssTokens.tokens[TokenKind.BANG]) && |
| 127 _maybeEatChar(cssTokens.tokens[TokenKind.MINUS]) && |
| 128 _maybeEatChar(cssTokens.tokens[TokenKind.MINUS])) { |
| 129 return finishMultiLineComment(); |
| 130 } else { |
| 131 return _finishToken(TokenKind.LESS); |
| 132 } |
| 133 case cssTokens.tokens[TokenKind.EQUALS]: |
| 134 return _finishToken(TokenKind.EQUALS); |
| 135 case cssTokens.tokens[TokenKind.OR]: |
| 136 if (_maybeEatChar(cssTokens.tokens[TokenKind.EQUALS])) { |
| 137 return _finishToken(TokenKind.DASH_MATCH); // |= |
| 138 } else { |
| 139 return _finishToken(TokenKind.OR); |
| 140 } |
| 141 case cssTokens.tokens[TokenKind.CARET]: |
| 142 if (_maybeEatChar(cssTokens.tokens[TokenKind.EQUALS])) { |
| 143 return _finishToken(TokenKind.PREFIX_MATCH); // ^= |
| 144 } else { |
| 145 return _finishToken(TokenKind.CARET); |
| 146 } |
| 147 case cssTokens.tokens[TokenKind.DOLLAR]: |
| 148 if (_maybeEatChar(cssTokens.tokens[TokenKind.EQUALS])) { |
| 149 return _finishToken(TokenKind.SUFFIX_MATCH); // $= |
| 150 } else { |
| 151 return _finishToken(TokenKind.DOLLAR); |
| 152 } |
| 153 case cssTokens.tokens[TokenKind.BANG]: |
| 154 lang.Token tok = finishIdentifier(); |
| 155 return (tok == null) ? _finishToken(TokenKind.BANG) : tok; |
69 default: | 156 default: |
70 if (isIdentifierStart(ch)) { | 157 if (TokenizerHelpers.isIdentifierStart(ch)) { |
71 return this.finishIdentifier(); | 158 return this.finishIdentifier(); |
72 } else if (isDigit(ch)) { | 159 } else if (isDigit(ch)) { |
73 return this.finishNumber(); | 160 return this.finishNumber(); |
74 } else { | 161 } else { |
75 return _errorToken(); | 162 return _errorToken(); |
76 } | 163 } |
77 } | 164 } |
78 } | 165 } |
79 | 166 |
80 // TODO(jmesserly): we need a way to emit human readable error messages from | 167 // TODO(jmesserly): we need a way to emit human readable error messages from |
81 // the tokenizer. | 168 // the tokenizer. |
82 lang.Token _errorToken() { | 169 lang.Token _errorToken() { |
83 return _finishToken(TokenKind.ERROR); | 170 return _finishToken(TokenKind.ERROR); |
84 } | 171 } |
85 | 172 |
86 int getIdentifierKind() { | 173 int getIdentifierKind() { |
87 return TokenKind.IDENTIFIER; | 174 // Is the identifier a unit type? |
| 175 int tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex); |
| 176 if (tokId == -1) { |
| 177 // No, is it a directive? |
| 178 tokId = TokenKind.matchDirectives(_text, _startIndex, _index - _startIndex
); |
| 179 } |
| 180 if (tokId == -1) { |
| 181 tokId = (_text.substring(_startIndex, _index) == '!important') ? |
| 182 TokenKind.IMPORTANT : -1; |
| 183 } |
| 184 |
| 185 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER; |
88 } | 186 } |
89 | 187 |
90 // Need to override so CSS version of isIdentifierPart is used. | 188 // Need to override so CSS version of isIdentifierPart is used. |
91 lang.Token finishIdentifier() { | 189 lang.Token finishIdentifier() { |
92 while (_index < _text.length) { | 190 while (_index < _text.length) { |
93 if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) { | 191 // if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) { |
94 _index--; | 192 if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index))) { |
| 193 // _index--; |
95 break; | 194 break; |
| 195 } else { |
| 196 _index += 1; |
96 } | 197 } |
97 } | 198 } |
98 int kind = getIdentifierKind(); | |
99 if (_interpStack != null && _interpStack.depth == -1) { | 199 if (_interpStack != null && _interpStack.depth == -1) { |
100 _interpStack.depth = 0; | 200 _interpStack.depth = 0; |
101 } | 201 } |
| 202 int kind = getIdentifierKind(); |
102 if (kind == TokenKind.IDENTIFIER) { | 203 if (kind == TokenKind.IDENTIFIER) { |
103 return _finishToken(TokenKind.IDENTIFIER); | 204 return _finishToken(TokenKind.IDENTIFIER); |
104 } else { | 205 } else { |
105 return _finishToken(kind); | 206 return _finishToken(kind); |
106 } | 207 } |
107 } | 208 } |
| 209 |
| 210 lang.Token finishImportant() { |
| 211 |
| 212 } |
| 213 |
| 214 lang.Token finishNumber() { |
| 215 eatDigits(); |
| 216 |
| 217 if (_peekChar() == 46/*.*/) { |
| 218 // Handle the case of 1.toString(). |
| 219 _nextChar(); |
| 220 if (isDigit(_peekChar())) { |
| 221 eatDigits(); |
| 222 return _finishToken(TokenKind.DOUBLE); |
| 223 } else { |
| 224 _index -= 1; |
| 225 } |
| 226 } |
| 227 |
| 228 return _finishToken(TokenKind.INTEGER); |
| 229 } |
| 230 |
| 231 bool maybeEatDigit() { |
| 232 if (_index < _text.length && isDigit(_text.charCodeAt(_index))) { |
| 233 _index += 1; |
| 234 return true; |
| 235 } |
| 236 return false; |
| 237 } |
| 238 |
| 239 void eatHexDigits() { |
| 240 while (_index < _text.length) { |
| 241 if (isHexDigit(_text.charCodeAt(_index))) { |
| 242 _index += 1; |
| 243 } else { |
| 244 return; |
| 245 } |
| 246 } |
| 247 } |
| 248 |
| 249 bool maybeEatHexDigit() { |
| 250 if (_index < _text.length && isHexDigit(_text.charCodeAt(_index))) { |
| 251 _index += 1; |
| 252 return true; |
| 253 } |
| 254 return false; |
| 255 } |
| 256 |
| 257 lang.Token finishMultiLineComment() { |
| 258 while (true) { |
| 259 int ch = _nextChar(); |
| 260 if (ch == 0) { |
| 261 return _finishToken(TokenKind.INCOMPLETE_COMMENT); |
| 262 } else if (ch == 42/*'*'*/) { |
| 263 if (_maybeEatChar(47/*'/'*/)) { |
| 264 if (_skipWhitespace) { |
| 265 return next(); |
| 266 } else { |
| 267 return _finishToken(TokenKind.COMMENT); |
| 268 } |
| 269 } |
| 270 } else if (ch == cssTokens.tokens[TokenKind.MINUS]) { |
| 271 /* Check if close part of Comment Definition --> (CDC). */ |
| 272 if (_maybeEatChar(cssTokens.tokens[TokenKind.MINUS])) { |
| 273 if (_maybeEatChar(cssTokens.tokens[TokenKind.GREATER])) { |
| 274 if (_skipWhitespace) { |
| 275 return next(); |
| 276 } else { |
| 277 return _finishToken(TokenKind.HTML_COMMENT); |
| 278 } |
| 279 } |
| 280 } |
| 281 } |
| 282 } |
| 283 return _errorToken(); |
| 284 } |
| 285 |
108 } | 286 } |
109 | 287 |
110 /** Static helper methods. */ | 288 /** Static helper methods. */ |
111 class TokenizerHelpers { | 289 class TokenizerHelpers { |
112 static bool isIdentifierStart(int c) => | 290 static bool isIdentifierStart(int c) => |
113 lang.TokenizerHelpers.isIdentifierStart(c) || c == 95 /*_*/; | 291 lang.TokenizerHelpers.isIdentifierStart(c) || c == 95 /*_*/ || |
| 292 c == 45; /*-*/ |
114 | 293 |
115 static bool isDigit(int c) => lang.TokenizerHelpers.isDigit(c); | 294 static bool isDigit(int c) => lang.TokenizerHelpers.isDigit(c); |
116 | 295 |
117 static bool isHexDigit(int c) => lang.TokenizerHelpers.isHexDigit(c); | 296 static bool isHexDigit(int c) => lang.TokenizerHelpers.isHexDigit(c); |
118 | 297 |
119 static bool isWhitespace(int c) => lang.TokenizerHelpers.isWhitespace(c); | 298 static bool isWhitespace(int c) => lang.TokenizerHelpers.isWhitespace(c); |
120 | 299 |
121 static bool isIdentifierPart(int c) => | 300 static bool isIdentifierPart(int c) => |
122 lang.TokenizerHelpers.isIdentifierPart(c) || c == 45 /*-*/; | 301 lang.TokenizerHelpers.isIdentifierPart(c) || c == 45 /*-*/; |
123 } | 302 } |
OLD | NEW |