OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 part of csslib.parser; |
| 6 |
| 7 class Tokenizer extends TokenizerBase { |
| 8 /** U+ prefix for unicode characters. */ |
| 9 final UNICODE_U = 'U'.codeUnitAt(0); |
| 10 final UNICODE_LOWER_U = 'u'.codeUnitAt(0); |
| 11 final UNICODE_PLUS = '+'.codeUnitAt(0); |
| 12 |
| 13 final QUESTION_MARK = '?'.codeUnitAt(0); |
| 14 |
| 15 /** CDATA keyword. */ |
| 16 final List CDATA_NAME = 'CDATA'.codeUnits; |
| 17 |
| 18 Tokenizer(SourceFile file, String text, bool skipWhitespace, |
| 19 [int index = 0]) |
| 20 : super(file, text, skipWhitespace, index); |
| 21 |
| 22 Token next({unicodeRange: false}) { |
| 23 // keep track of our starting position |
| 24 _startIndex = _index; |
| 25 |
| 26 int ch; |
| 27 ch = _nextChar(); |
| 28 switch (ch) { |
| 29 case TokenChar.NEWLINE: |
| 30 case TokenChar.RETURN: |
| 31 case TokenChar.SPACE: |
| 32 case TokenChar.TAB: |
| 33 return finishWhitespace(); |
| 34 case TokenChar.END_OF_FILE: |
| 35 return _finishToken(TokenKind.END_OF_FILE); |
| 36 case TokenChar.AT: |
| 37 int peekCh = _peekChar(); |
| 38 if (TokenizerHelpers.isIdentifierStart(peekCh)) { |
| 39 var oldIndex = _index; |
| 40 var oldStartIndex = _startIndex; |
| 41 |
| 42 _startIndex = _index; |
| 43 ch = _nextChar(); |
| 44 Token ident = this.finishIdentifier(ch); |
| 45 |
| 46 // Is it a directive? |
| 47 int tokId = TokenKind.matchDirectives(_text, _startIndex, |
| 48 _index - _startIndex); |
| 49 if (tokId == -1) { |
| 50 // No, is it a margin directive? |
| 51 tokId = TokenKind.matchMarginDirectives(_text, _startIndex, |
| 52 _index - _startIndex); |
| 53 } |
| 54 |
| 55 if (tokId != -1) { |
| 56 return _finishToken(tokId); |
| 57 } else { |
| 58 // Didn't find a CSS directive or margin directive so the @name is |
| 59 // probably the Less definition '@name: value_variable_definition'. |
| 60 _startIndex = oldStartIndex; |
| 61 _index = oldIndex; |
| 62 } |
| 63 } |
| 64 return _finishToken(TokenKind.AT); |
| 65 case TokenChar.DOT: |
| 66 int start = _startIndex; // Start where the dot started. |
| 67 if (maybeEatDigit()) { |
| 68 // looks like a number dot followed by digit(s). |
| 69 Token number = finishNumber(); |
| 70 if (number.kind == TokenKind.INTEGER) { |
| 71 // It's a number but it's preceeded by a dot, so make it a double. |
| 72 _startIndex = start; |
| 73 return _finishToken(TokenKind.DOUBLE); |
| 74 } else { |
| 75 // Don't allow dot followed by a double (e.g, '..1'). |
| 76 return _errorToken(); |
| 77 } |
| 78 } |
| 79 // It's really a dot. |
| 80 return _finishToken(TokenKind.DOT); |
| 81 case TokenChar.LPAREN: |
| 82 return _finishToken(TokenKind.LPAREN); |
| 83 case TokenChar.RPAREN: |
| 84 return _finishToken(TokenKind.RPAREN); |
| 85 case TokenChar.LBRACE: |
| 86 return _finishToken(TokenKind.LBRACE); |
| 87 case TokenChar.RBRACE: |
| 88 return _finishToken(TokenKind.RBRACE); |
| 89 case TokenChar.LBRACK: |
| 90 return _finishToken(TokenKind.LBRACK); |
| 91 case TokenChar.RBRACK: |
| 92 if (_maybeEatChar(TokenChar.RBRACK) && |
| 93 _maybeEatChar(TokenChar.GREATER)) { |
| 94 // ]]> |
| 95 return next(); |
| 96 } |
| 97 return _finishToken(TokenKind.RBRACK); |
| 98 case TokenChar.HASH: |
| 99 return _finishToken(TokenKind.HASH); |
| 100 case TokenChar.PLUS: |
| 101 if (maybeEatDigit()) return finishNumber(); |
| 102 return _finishToken(TokenKind.PLUS); |
| 103 case TokenChar.MINUS: |
| 104 if (selectorExpression || unicodeRange) { |
| 105 // If parsing in pseudo function expression then minus is an operator |
| 106 // not part of identifier e.g., interval value range (e.g. U+400-4ff) |
| 107 // or minus operator in selector expression. |
| 108 return _finishToken(TokenKind.MINUS); |
| 109 } else if (maybeEatDigit()) { |
| 110 return finishNumber(); |
| 111 } else if (TokenizerHelpers.isIdentifierStart(ch)) { |
| 112 return this.finishIdentifier(ch); |
| 113 } |
| 114 return _finishToken(TokenKind.MINUS); |
| 115 case TokenChar.GREATER: |
| 116 return _finishToken(TokenKind.GREATER); |
| 117 case TokenChar.TILDE: |
| 118 if (_maybeEatChar(TokenChar.EQUALS)) { |
| 119 return _finishToken(TokenKind.INCLUDES); // ~= |
| 120 } |
| 121 return _finishToken(TokenKind.TILDE); |
| 122 case TokenChar.ASTERISK: |
| 123 if (_maybeEatChar(TokenChar.EQUALS)) { |
| 124 return _finishToken(TokenKind.SUBSTRING_MATCH); // *= |
| 125 } |
| 126 return _finishToken(TokenKind.ASTERISK); |
| 127 case TokenChar.AMPERSAND: |
| 128 return _finishToken(TokenKind.AMPERSAND); |
| 129 case TokenChar.NAMESPACE: |
| 130 return _finishToken(TokenKind.NAMESPACE); |
| 131 case TokenChar.COLON: |
| 132 return _finishToken(TokenKind.COLON); |
| 133 case TokenChar.COMMA: |
| 134 return _finishToken(TokenKind.COMMA); |
| 135 case TokenChar.SEMICOLON: |
| 136 return _finishToken(TokenKind.SEMICOLON); |
| 137 case TokenChar.PERCENT: |
| 138 return _finishToken(TokenKind.PERCENT); |
| 139 case TokenChar.SINGLE_QUOTE: |
| 140 return _finishToken(TokenKind.SINGLE_QUOTE); |
| 141 case TokenChar.DOUBLE_QUOTE: |
| 142 return _finishToken(TokenKind.DOUBLE_QUOTE); |
| 143 case TokenChar.SLASH: |
| 144 if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment(); |
| 145 return _finishToken(TokenKind.SLASH); |
| 146 case TokenChar.LESS: // <!-- |
| 147 if (_maybeEatChar(TokenChar.BANG)) { |
| 148 if (_maybeEatChar(TokenChar.MINUS) && |
| 149 _maybeEatChar(TokenChar.MINUS)) { |
| 150 return finishMultiLineComment(); |
| 151 } else if (_maybeEatChar(TokenChar.LBRACK) && |
| 152 _maybeEatChar(CDATA_NAME[0]) && |
| 153 _maybeEatChar(CDATA_NAME[1]) && |
| 154 _maybeEatChar(CDATA_NAME[2]) && |
| 155 _maybeEatChar(CDATA_NAME[3]) && |
| 156 _maybeEatChar(CDATA_NAME[4]) && |
| 157 _maybeEatChar(TokenChar.LBRACK)) { |
| 158 // <![CDATA[ |
| 159 return next(); |
| 160 } |
| 161 } |
| 162 return _finishToken(TokenKind.LESS); |
| 163 case TokenChar.EQUALS: |
| 164 return _finishToken(TokenKind.EQUALS); |
| 165 case TokenChar.OR: |
| 166 if (_maybeEatChar(TokenChar.EQUALS)) { |
| 167 return _finishToken(TokenKind.DASH_MATCH); // |= |
| 168 } |
| 169 return _finishToken(TokenKind.OR); |
| 170 case TokenChar.CARET: |
| 171 if (_maybeEatChar(TokenChar.EQUALS)) { |
| 172 return _finishToken(TokenKind.PREFIX_MATCH); // ^= |
| 173 } |
| 174 return _finishToken(TokenKind.CARET); |
| 175 case TokenChar.DOLLAR: |
| 176 if (_maybeEatChar(TokenChar.EQUALS)) { |
| 177 return _finishToken(TokenKind.SUFFIX_MATCH); // $= |
| 178 } |
| 179 return _finishToken(TokenKind.DOLLAR); |
| 180 case TokenChar.BANG: |
| 181 Token tok = finishIdentifier(ch); |
| 182 return (tok == null) ? _finishToken(TokenKind.BANG) : tok; |
| 183 case TokenChar.BACKSLASH: |
| 184 return _finishToken(TokenKind.BACKSLASH); |
| 185 default: |
| 186 if (unicodeRange) { |
| 187 // Three types of unicode ranges: |
| 188 // - single code point (e.g. U+416) |
| 189 // - interval value range (e.g. U+400-4ff) |
| 190 // - range where trailing ‘?’ characters imply ‘any digit value’ |
| 191 // (e.g. U+4??) |
| 192 if (maybeEatHexDigit()) { |
| 193 var t = finishHexNumber(); |
| 194 // Any question marks then it's a HEX_RANGE not HEX_NUMBER. |
| 195 if (maybeEatQuestionMark()) finishUnicodeRange(); |
| 196 return t; |
| 197 } else if (maybeEatQuestionMark()) { |
| 198 // HEX_RANGE U+N??? |
| 199 return finishUnicodeRange(); |
| 200 } else { |
| 201 return _errorToken(); |
| 202 } |
| 203 } else if ((ch == UNICODE_U || ch == UNICODE_LOWER_U) && |
| 204 (_peekChar() == UNICODE_PLUS)) { |
| 205 // Unicode range: U+uNumber[-U+uNumber] |
| 206 // uNumber = 0..10FFFF |
| 207 _nextChar(); // Skip + |
| 208 _startIndex = _index; // Starts at the number |
| 209 return _finishToken(TokenKind.UNICODE_RANGE); |
| 210 } else if (varDef(ch)) { |
| 211 return _finishToken(TokenKind.VAR_DEFINITION); |
| 212 } else if (varUsage(ch)) { |
| 213 return _finishToken(TokenKind.VAR_USAGE); |
| 214 } else if (TokenizerHelpers.isIdentifierStart(ch)) { |
| 215 return finishIdentifier(ch); |
| 216 } else if (TokenizerHelpers.isDigit(ch)) { |
| 217 return finishNumber(); |
| 218 } |
| 219 return _errorToken(); |
| 220 } |
| 221 } |
| 222 |
| 223 bool varDef(int ch) { |
| 224 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) && |
| 225 _maybeEatChar('r'.codeUnitAt(0)) && _maybeEatChar('-'.codeUnitAt(0)); |
| 226 } |
| 227 |
| 228 bool varUsage(int ch) { |
| 229 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) && |
| 230 _maybeEatChar('r'.codeUnitAt(0)) && (_peekChar() == '-'.codeUnitAt(0)); |
| 231 } |
| 232 |
| 233 Token _errorToken([String message = null]) { |
| 234 return _finishToken(TokenKind.ERROR); |
| 235 } |
| 236 |
| 237 int getIdentifierKind() { |
| 238 // Is the identifier a unit type? |
| 239 int tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex); |
| 240 if (tokId == -1) { |
| 241 tokId = (_text.substring(_startIndex, _index) == '!important') ? |
| 242 TokenKind.IMPORTANT : -1; |
| 243 } |
| 244 |
| 245 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER; |
| 246 } |
| 247 |
| 248 // Need to override so CSS version of isIdentifierPart is used. |
| 249 Token finishIdentifier(int ch) { |
| 250 while (_index < _text.length) { |
| 251 // If parsing in pseudo function expression then minus is an operator |
| 252 // not part of identifier. |
| 253 var isIdentifier = selectorExpression |
| 254 ? TokenizerHelpers.isIdentifierPartExpr(_text.codeUnitAt(_index)) |
| 255 : TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index)); |
| 256 if (!isIdentifier) { |
| 257 break; |
| 258 } else { |
| 259 _index += 1; |
| 260 } |
| 261 } |
| 262 |
| 263 int kind = getIdentifierKind(); |
| 264 if (kind == TokenKind.IDENTIFIER) { |
| 265 return _finishToken(TokenKind.IDENTIFIER); |
| 266 } else { |
| 267 return _finishToken(kind); |
| 268 } |
| 269 } |
| 270 |
| 271 Token finishImportant() { |
| 272 |
| 273 } |
| 274 |
| 275 Token finishNumber() { |
| 276 eatDigits(); |
| 277 |
| 278 if (_peekChar() == 46/*.*/) { |
| 279 // Handle the case of 1.toString(). |
| 280 _nextChar(); |
| 281 if (TokenizerHelpers.isDigit(_peekChar())) { |
| 282 eatDigits(); |
| 283 return _finishToken(TokenKind.DOUBLE); |
| 284 } else { |
| 285 _index -= 1; |
| 286 } |
| 287 } |
| 288 |
| 289 return _finishToken(TokenKind.INTEGER); |
| 290 } |
| 291 |
| 292 bool maybeEatDigit() { |
| 293 if (_index < _text.length |
| 294 && TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) { |
| 295 _index += 1; |
| 296 return true; |
| 297 } |
| 298 return false; |
| 299 } |
| 300 |
| 301 Token finishHexNumber() { |
| 302 eatHexDigits(); |
| 303 return _finishToken(TokenKind.HEX_INTEGER); |
| 304 } |
| 305 |
| 306 void eatHexDigits() { |
| 307 while (_index < _text.length) { |
| 308 if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) { |
| 309 _index += 1; |
| 310 } else { |
| 311 return; |
| 312 } |
| 313 } |
| 314 } |
| 315 |
| 316 bool maybeEatHexDigit() { |
| 317 if (_index < _text.length |
| 318 && TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) { |
| 319 _index += 1; |
| 320 return true; |
| 321 } |
| 322 return false; |
| 323 } |
| 324 |
| 325 bool maybeEatQuestionMark() { |
| 326 if (_index < _text.length && |
| 327 _text.codeUnitAt(_index) == QUESTION_MARK) { |
| 328 _index += 1; |
| 329 return true; |
| 330 } |
| 331 return false; |
| 332 } |
| 333 |
| 334 void eatQuestionMarks() { |
| 335 while (_index < _text.length) { |
| 336 if (_text.codeUnitAt(_index) == QUESTION_MARK) { |
| 337 _index += 1; |
| 338 } else { |
| 339 return; |
| 340 } |
| 341 } |
| 342 } |
| 343 |
| 344 Token finishUnicodeRange() { |
| 345 eatQuestionMarks(); |
| 346 return _finishToken(TokenKind.HEX_RANGE); |
| 347 } |
| 348 |
| 349 Token finishMultiLineComment() { |
| 350 while (true) { |
| 351 int ch = _nextChar(); |
| 352 if (ch == 0) { |
| 353 return _finishToken(TokenKind.INCOMPLETE_COMMENT); |
| 354 } else if (ch == 42/*'*'*/) { |
| 355 if (_maybeEatChar(47/*'/'*/)) { |
| 356 if (_skipWhitespace) { |
| 357 return next(); |
| 358 } else { |
| 359 return _finishToken(TokenKind.COMMENT); |
| 360 } |
| 361 } |
| 362 } else if (ch == TokenChar.MINUS) { |
| 363 /* Check if close part of Comment Definition --> (CDC). */ |
| 364 if (_maybeEatChar(TokenChar.MINUS)) { |
| 365 if (_maybeEatChar(TokenChar.GREATER)) { |
| 366 if (_skipWhitespace) { |
| 367 return next(); |
| 368 } else { |
| 369 return _finishToken(TokenKind.HTML_COMMENT); |
| 370 } |
| 371 } |
| 372 } |
| 373 } |
| 374 } |
| 375 return _errorToken(); |
| 376 } |
| 377 |
| 378 } |
| 379 |
| 380 /** Static helper methods. */ |
| 381 class TokenizerHelpers { |
| 382 static bool isIdentifierStart(int c) { |
| 383 return isIdentifierStartExpr(c) || c == 45 /*-*/; |
| 384 } |
| 385 |
| 386 static bool isDigit(int c) { |
| 387 return (c >= 48/*0*/ && c <= 57/*9*/); |
| 388 } |
| 389 |
| 390 static bool isHexDigit(int c) { |
| 391 return (isDigit(c) || (c >= 97/*a*/ && c <= 102/*f*/) |
| 392 || (c >= 65/*A*/ && c <= 70/*F*/)); |
| 393 } |
| 394 |
| 395 static bool isIdentifierPart(int c) { |
| 396 return isIdentifierPartExpr(c) || c == 45 /*-*/; |
| 397 } |
| 398 |
| 399 /** Pseudo function expressions identifiers can't have a minus sign. */ |
| 400 static bool isIdentifierStartExpr(int c) { |
| 401 return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) || |
| 402 c == 95/*_*/); |
| 403 } |
| 404 |
| 405 /** Pseudo function expressions identifiers can't have a minus sign. */ |
| 406 static bool isIdentifierPartExpr(int c) { |
| 407 return (isIdentifierStartExpr(c) || isDigit(c)); |
| 408 } |
| 409 } |
OLD | NEW |