| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 class Tokenizer extends TokenizerBase { | |
| 6 TokenKind tmplTokens; | |
| 7 | |
| 8 bool _selectorParsing; | |
| 9 | |
| 10 Tokenizer(SourceFile source, bool skipWhitespace, [int index = 0]) | |
| 11 : super(source, skipWhitespace, index), _selectorParsing = false { | |
| 12 tmplTokens = new TokenKind(); | |
| 13 } | |
| 14 | |
| 15 int get startIndex => _startIndex; | |
| 16 void set index(int idx) { | |
| 17 _index = idx; | |
| 18 } | |
| 19 | |
| 20 Token next([bool inTag = true]) { | |
| 21 // keep track of our starting position | |
| 22 _startIndex = _index; | |
| 23 | |
| 24 if (_interpStack != null && _interpStack.depth == 0) { | |
| 25 var istack = _interpStack; | |
| 26 _interpStack = _interpStack.pop(); | |
| 27 | |
| 28 /* TODO(terry): Enable for variable and string interpolation. | |
| 29 * if (istack.isMultiline) { | |
| 30 * return finishMultilineStringBody(istack.quote); | |
| 31 * } else { | |
| 32 * return finishStringBody(istack.quote); | |
| 33 * } | |
| 34 */ | |
| 35 } | |
| 36 | |
| 37 int ch; | |
| 38 ch = _nextChar(); | |
| 39 switch(ch) { | |
| 40 case 0: | |
| 41 return _finishToken(TokenKind.END_OF_FILE); | |
| 42 case tmplTokens.tokens[TokenKind.SPACE]: | |
| 43 case tmplTokens.tokens[TokenKind.TAB]: | |
| 44 case tmplTokens.tokens[TokenKind.NEWLINE]: | |
| 45 case tmplTokens.tokens[TokenKind.RETURN]: | |
| 46 if (inTag) { | |
| 47 return finishWhitespace(); | |
| 48 } else { | |
| 49 return _finishToken(TokenKind.WHITESPACE); | |
| 50 } | |
| 51 case tmplTokens.tokens[TokenKind.END_OF_FILE]: | |
| 52 return _finishToken(TokenKind.END_OF_FILE); | |
| 53 case tmplTokens.tokens[TokenKind.LPAREN]: | |
| 54 return _finishToken(TokenKind.LPAREN); | |
| 55 case tmplTokens.tokens[TokenKind.RPAREN]: | |
| 56 return _finishToken(TokenKind.RPAREN); | |
| 57 case tmplTokens.tokens[TokenKind.COMMA]: | |
| 58 return _finishToken(TokenKind.COMMA); | |
| 59 case tmplTokens.tokens[TokenKind.LBRACE]: | |
| 60 return _finishToken(TokenKind.LBRACE); | |
| 61 case tmplTokens.tokens[TokenKind.RBRACE]: | |
| 62 return _finishToken(TokenKind.RBRACE); | |
| 63 case tmplTokens.tokens[TokenKind.LESS_THAN]: | |
| 64 return _finishToken(TokenKind.LESS_THAN); | |
| 65 case tmplTokens.tokens[TokenKind.GREATER_THAN]: | |
| 66 return _finishToken(TokenKind.GREATER_THAN); | |
| 67 case tmplTokens.tokens[TokenKind.EQUAL]: | |
| 68 if (inTag) { | |
| 69 if (_maybeEatChar(tmplTokens.tokens[TokenKind.SINGLE_QUOTE])) { | |
| 70 return finishQuotedAttrValue( | |
| 71 tmplTokens.tokens[TokenKind.SINGLE_QUOTE]); | |
| 72 } else if (_maybeEatChar(tmplTokens.tokens[TokenKind.DOUBLE_QUOTE])) { | |
| 73 return finishQuotedAttrValue( | |
| 74 tmplTokens.tokens[TokenKind.DOUBLE_QUOTE]); | |
| 75 } else if (TokenizerHelpers.isAttributeValueStart(_peekChar())) { | |
| 76 return finishAttrValue(); | |
| 77 } | |
| 78 } | |
| 79 return _finishToken(TokenKind.EQUAL); | |
| 80 case tmplTokens.tokens[TokenKind.SLASH]: | |
| 81 if (_maybeEatChar(tmplTokens.tokens[TokenKind.GREATER_THAN])) { | |
| 82 return _finishToken(TokenKind.END_NO_SCOPE_TAG); // /> | |
| 83 } else if (_maybeEatChar(tmplTokens.tokens[TokenKind.ASTERISK])) { | |
| 84 return finishMultiLineComment(); | |
| 85 } else { | |
| 86 return _finishToken(TokenKind.SLASH); | |
| 87 } | |
| 88 case tmplTokens.tokens[TokenKind.DOLLAR]: | |
| 89 if (_maybeEatChar(tmplTokens.tokens[TokenKind.LBRACE])) { | |
| 90 if (_maybeEatChar(tmplTokens.tokens[TokenKind.HASH])) { | |
| 91 return _finishToken(TokenKind.START_COMMAND); // ${# | |
| 92 } else if (_maybeEatChar(tmplTokens.tokens[TokenKind.SLASH])) { | |
| 93 return _finishToken(TokenKind.END_COMMAND); // ${/ | |
| 94 } else { | |
| 95 return _finishToken(TokenKind.START_EXPRESSION); // ${ | |
| 96 } | |
| 97 } else { | |
| 98 return _finishToken(TokenKind.DOLLAR); | |
| 99 } | |
| 100 | |
| 101 default: | |
| 102 if (TokenizerHelpers.isIdentifierStart(ch)) { | |
| 103 return this.finishIdentifier(); | |
| 104 } else if (TokenizerHelpers.isDigit(ch)) { | |
| 105 return this.finishNumber(); | |
| 106 } else { | |
| 107 return _errorToken(); | |
| 108 } | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 // TODO(jmesserly): we need a way to emit human readable error messages from | |
| 113 // the tokenizer. | |
| 114 Token _errorToken([String message = null]) { | |
| 115 return _finishToken(TokenKind.ERROR); | |
| 116 } | |
| 117 | |
| 118 int getIdentifierKind() { | |
| 119 // Is the identifier an element? | |
| 120 int tokId = TokenKind.matchElements(_text, _startIndex, | |
| 121 _index - _startIndex); | |
| 122 if (tokId == -1) { | |
| 123 // No, is it an attribute? | |
| 124 // tokId = TokenKind.matchAttributes(_text, _startIndex, _index - _startInd
ex); | |
| 125 } | |
| 126 if (tokId == -1) { | |
| 127 tokId = TokenKind.matchKeywords(_text, _startIndex, _index - _startIndex); | |
| 128 } | |
| 129 | |
| 130 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER; | |
| 131 } | |
| 132 | |
| 133 // Need to override so CSS version of isIdentifierPart is used. | |
| 134 Token finishIdentifier() { | |
| 135 while (_index < _text.length) { | |
| 136 // if (!TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index++))) { | |
| 137 if (!TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index))) { | |
| 138 // _index--; | |
| 139 break; | |
| 140 } else { | |
| 141 _index += 1; | |
| 142 } | |
| 143 } | |
| 144 if (_interpStack != null && _interpStack.depth == -1) { | |
| 145 _interpStack.depth = 0; | |
| 146 } | |
| 147 int kind = getIdentifierKind(); | |
| 148 if (kind == TokenKind.IDENTIFIER) { | |
| 149 return _finishToken(TokenKind.IDENTIFIER); | |
| 150 } else { | |
| 151 return _finishToken(kind); | |
| 152 } | |
| 153 } | |
| 154 | |
| 155 Token _makeAttributeValueToken(List<int> buf) { | |
| 156 final s = new String.fromCharCodes(buf); | |
| 157 return new LiteralToken(TokenKind.ATTR_VALUE, _source, _startIndex, _index, | |
| 158 s); | |
| 159 } | |
| 160 | |
| 161 /* quote if -1 signals to read upto first whitespace otherwise read upto | |
| 162 * single or double quote char. | |
| 163 */ | |
| 164 Token finishQuotedAttrValue([int quote = -1]) { | |
| 165 var buf = new List<int>(); | |
| 166 while (true) { | |
| 167 int ch = _nextChar(); | |
| 168 if (ch == quote) { | |
| 169 return _makeAttributeValueToken(buf); | |
| 170 } else if (ch == 0) { | |
| 171 return _errorToken(); | |
| 172 } else { | |
| 173 buf.add(ch); | |
| 174 } | |
| 175 } | |
| 176 } | |
| 177 | |
| 178 Token finishAttrValue() { | |
| 179 var buf = new List<int>(); | |
| 180 while (true) { | |
| 181 int ch = _peekChar(); | |
| 182 if (TokenizerHelpers.isWhitespace(ch) || TokenizerHelpers.isSlash(ch) || | |
| 183 TokenizerHelpers.isCloseTag(ch)) { | |
| 184 return _makeAttributeValueToken(buf); | |
| 185 } else if (ch == 0) { | |
| 186 return _errorToken(); | |
| 187 } else { | |
| 188 buf.add(_nextChar()); | |
| 189 } | |
| 190 } | |
| 191 } | |
| 192 | |
| 193 Token finishNumber() { | |
| 194 eatDigits(); | |
| 195 | |
| 196 if (_peekChar() == 46/*.*/) { | |
| 197 // Handle the case of 1.toString(). | |
| 198 _nextChar(); | |
| 199 if (TokenizerHelpers.isDigit(_peekChar())) { | |
| 200 eatDigits(); | |
| 201 return _finishToken(TokenKind.DOUBLE); | |
| 202 } else { | |
| 203 _index -= 1; | |
| 204 } | |
| 205 } | |
| 206 | |
| 207 return _finishToken(TokenKind.INTEGER); | |
| 208 } | |
| 209 | |
| 210 bool maybeEatDigit() { | |
| 211 if (_index < _text.length && TokenizerHelpers.isDigit( | |
| 212 _text.codeUnitAt(_index))) { | |
| 213 _index += 1; | |
| 214 return true; | |
| 215 } | |
| 216 return false; | |
| 217 } | |
| 218 | |
| 219 void eatHexDigits() { | |
| 220 while (_index < _text.length) { | |
| 221 if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) { | |
| 222 _index += 1; | |
| 223 } else { | |
| 224 return; | |
| 225 } | |
| 226 } | |
| 227 } | |
| 228 | |
| 229 bool maybeEatHexDigit() { | |
| 230 if (_index < _text.length && TokenizerHelpers.isHexDigit( | |
| 231 _text.codeUnitAt(_index))) { | |
| 232 _index += 1; | |
| 233 return true; | |
| 234 } | |
| 235 return false; | |
| 236 } | |
| 237 | |
| 238 Token finishMultiLineComment() { | |
| 239 while (true) { | |
| 240 int ch = _nextChar(); | |
| 241 if (ch == 0) { | |
| 242 return _finishToken(TokenKind.INCOMPLETE_COMMENT); | |
| 243 } else if (ch == 42/*'*'*/) { | |
| 244 if (_maybeEatChar(47/*'/'*/)) { | |
| 245 if (_skipWhitespace) { | |
| 246 return next(); | |
| 247 } else { | |
| 248 return _finishToken(TokenKind.COMMENT); | |
| 249 } | |
| 250 } | |
| 251 } else if (ch == tmplTokens.tokens[TokenKind.MINUS]) { | |
| 252 /* Check if close part of Comment Definition --> (CDC). */ | |
| 253 if (_maybeEatChar(tmplTokens.tokens[TokenKind.MINUS])) { | |
| 254 if (_maybeEatChar(tmplTokens.tokens[TokenKind.GREATER_THAN])) { | |
| 255 if (_skipWhitespace) { | |
| 256 return next(); | |
| 257 } else { | |
| 258 return _finishToken(TokenKind.HTML_COMMENT); | |
| 259 } | |
| 260 } | |
| 261 } | |
| 262 } | |
| 263 } | |
| 264 return _errorToken(); | |
| 265 } | |
| 266 | |
| 267 } | |
| 268 | |
| 269 | |
| 270 /** Static helper methods. */ | |
| 271 class TokenizerHelpers { | |
| 272 static bool isIdentifierStart(int c) { | |
| 273 return ((c >= 97/*a*/ && c <= 122/*z*/) || | |
| 274 (c >= 65/*A*/ && c <= 90/*Z*/) || c == 95/*_*/); | |
| 275 } | |
| 276 | |
| 277 static bool isDigit(int c) { | |
| 278 return (c >= 48/*0*/ && c <= 57/*9*/); | |
| 279 } | |
| 280 | |
| 281 static bool isHexDigit(int c) { | |
| 282 return (isDigit(c) || (c >= 97/*a*/ && c <= 102/*f*/) || | |
| 283 (c >= 65/*A*/ && c <= 70/*F*/)); | |
| 284 } | |
| 285 | |
| 286 static bool isWhitespace(int c) { | |
| 287 return (c == 32/*' '*/ || c == 9/*'\t'*/ || c == 10/*'\n'*/ || | |
| 288 c == 13/*'\r'*/); | |
| 289 } | |
| 290 | |
| 291 static bool isIdentifierPart(int c) { | |
| 292 return (isIdentifierStart(c) || isDigit(c) || c == 45/*-*/ || | |
| 293 c == 58/*:*/ || c == 46/*.*/); | |
| 294 } | |
| 295 | |
| 296 static bool isInterpIdentifierPart(int c) { | |
| 297 return (isIdentifierStart(c) || isDigit(c)); | |
| 298 } | |
| 299 | |
| 300 static bool isAttributeValueStart(int c) { | |
| 301 return !isWhitespace(c) && !isSlash(c) && !isCloseTag(c); | |
| 302 } | |
| 303 | |
| 304 static bool isSlash(int c) { | |
| 305 return (c == 47/* / */); | |
| 306 } | |
| 307 | |
| 308 static bool isCloseTag(int c) { | |
| 309 return (c == 62/* > */); | |
| 310 } | |
| 311 } | |
| OLD | NEW |