OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 library polymer_expressions.tokenizer; |
| 6 |
| 7 const int _TAB = 9; |
| 8 const int _LF = 10; |
| 9 const int _VTAB = 11; |
| 10 const int _FF = 12; |
| 11 const int _CR = 13; |
| 12 const int _SPACE = 32; |
| 13 const int _BANG = 33; |
| 14 const int _DQ = 34; |
| 15 const int _$ = 36; |
| 16 const int _PERCENT = 37; |
| 17 const int _AMPERSAND = 38; |
| 18 const int _SQ = 39; |
| 19 const int _OPEN_PAREN = 40; |
| 20 const int _CLOSE_PAREN = 41; |
| 21 const int _STAR = 42; |
| 22 const int _PLUS = 43; |
| 23 const int _COMMA = 44; |
| 24 const int _MINUS = 45; |
| 25 const int _PERIOD = 46; |
| 26 const int _SLASH = 47; |
| 27 const int _0 = 48; |
| 28 const int _9 = 57; |
| 29 const int _COLON = 58; |
| 30 const int _LT = 60; |
| 31 const int _EQ = 61; |
| 32 const int _GT = 62; |
| 33 const int _QUESTION = 63; |
| 34 const int _A = 65; |
| 35 const int _Z = 90; |
| 36 const int _OPEN_SQUARE_BRACKET = 91; |
| 37 const int _BACKSLASH = 92; |
| 38 const int _CLOSE_SQUARE_BRACKET = 93; |
| 39 const int _CARET = 94; |
| 40 const int _US = 95; |
| 41 const int _a = 97; |
| 42 const int _f = 102; |
| 43 const int _n = 110; |
| 44 const int _r = 114; |
| 45 const int _t = 116; |
| 46 const int _v = 118; |
| 47 const int _z = 122; |
| 48 const int _OPEN_CURLY_BRACKET = 123; |
| 49 const int _BAR = 124; |
| 50 const int _CLOSE_CURLY_BRACKET = 125; |
| 51 const int _NBSP = 160; |
| 52 |
| 53 const _OPERATORS = const [_PLUS, _MINUS, _STAR, _SLASH, _BANG, _AMPERSAND, |
| 54 _PERCENT, _LT, _EQ, _GT, _QUESTION, _CARET, _BAR]; |
| 55 |
| 56 const _GROUPERS = const [_OPEN_PAREN, _CLOSE_PAREN, |
| 57 _OPEN_SQUARE_BRACKET, _CLOSE_SQUARE_BRACKET, |
| 58 _OPEN_CURLY_BRACKET, _CLOSE_CURLY_BRACKET]; |
| 59 |
| 60 const _TWO_CHAR_OPS = const ['==', '!=', '<=', '>=', '||', '&&']; |
| 61 |
| 62 const KEYWORDS = const ['as', 'in', 'this']; |
| 63 |
| 64 const _PRECEDENCE = const { |
| 65 '!': 0, |
| 66 ':': 0, |
| 67 ',': 0, |
| 68 ')': 0, |
| 69 ']': 0, |
| 70 '}': 0, // ? |
| 71 '?': 1, |
| 72 '||': 2, |
| 73 '&&': 3, |
| 74 '|': 4, |
| 75 '^': 5, |
| 76 '&': 6, |
| 77 |
| 78 // equality |
| 79 '!=': 7, |
| 80 '==': 7, |
| 81 '!==': 7, |
| 82 '===': 7, |
| 83 |
| 84 // relational |
| 85 '>=': 8, |
| 86 '>': 8, |
| 87 '<=': 8, |
| 88 '<': 8, |
| 89 |
| 90 // additive |
| 91 '+': 9, |
| 92 '-': 9, |
| 93 |
| 94 // multiplicative |
| 95 '%': 10, |
| 96 '/': 10, |
| 97 '*': 10, |
| 98 |
| 99 // postfix |
| 100 '(': 11, |
| 101 '[': 11, |
| 102 '.': 11, |
| 103 '{': 11, //not sure this is correct |
| 104 }; |
| 105 |
| 106 const POSTFIX_PRECEDENCE = 11; |
| 107 |
| 108 const int STRING_TOKEN = 1; |
| 109 const int IDENTIFIER_TOKEN = 2; |
| 110 const int DOT_TOKEN = 3; |
| 111 const int COMMA_TOKEN = 4; |
| 112 const int COLON_TOKEN = 5; |
| 113 const int INTEGER_TOKEN = 6; |
| 114 const int DECIMAL_TOKEN = 7; |
| 115 const int OPERATOR_TOKEN = 8; |
| 116 const int GROUPER_TOKEN = 9; |
| 117 const int KEYWORD_TOKEN = 10; |
| 118 |
| 119 bool isWhitespace(int next) => next == _SPACE || next == _TAB || next == _NBSP; |
| 120 |
| 121 bool isIdentifierOrKeywordStart(int next) => (_a <= next && next <= _z) || |
| 122 (_A <= next && next <= _Z) || next == _US || next == _$ || next > 127; |
| 123 |
| 124 bool isIdentifier(int next) => (_a <= next && next <= _z) || |
| 125 (_A <= next && next <= _Z) || (_0 <= next && next <= _9) || |
| 126 next == _US || next == _$ || next > 127; |
| 127 |
| 128 bool isQuote(int next) => next == _DQ || next == _SQ; |
| 129 |
| 130 bool isNumber(int next) => _0 <= next && next <= _9; |
| 131 |
| 132 bool isOperator(int next) => _OPERATORS.contains(next); |
| 133 |
| 134 bool isGrouper(int next) => _GROUPERS.contains(next); |
| 135 |
| 136 int escape(int c) { |
| 137 switch (c) { |
| 138 case _f: return _FF; |
| 139 case _n: return _LF; |
| 140 case _r: return _CR; |
| 141 case _t: return _TAB; |
| 142 case _v: return _VTAB; |
| 143 default: return c; |
| 144 } |
| 145 } |
| 146 |
| 147 class Token { |
| 148 final int kind; |
| 149 final String value; |
| 150 final int precedence; |
| 151 |
| 152 Token(this.kind, this.value, [this.precedence = 0]); |
| 153 |
| 154 String toString() => "($kind, '$value')"; |
| 155 } |
| 156 |
| 157 class Tokenizer { |
| 158 final List<Token> _tokens = <Token>[]; |
| 159 final StringBuffer _sb = new StringBuffer(); |
| 160 final RuneIterator _iterator; |
| 161 |
| 162 int _next; |
| 163 |
| 164 Tokenizer(String input) : _iterator = new RuneIterator(input); |
| 165 |
| 166 _advance() { |
| 167 _next = _iterator.moveNext() ? _iterator.current : null; |
| 168 } |
| 169 |
| 170 List<Token> tokenize() { |
| 171 _advance(); |
| 172 while(_next != null) { |
| 173 if (isWhitespace(_next)) { |
| 174 _advance(); |
| 175 } else if (isQuote(_next)) { |
| 176 tokenizeString(); |
| 177 } else if (isIdentifierOrKeywordStart(_next)) { |
| 178 tokenizeIdentifierOrKeyword(); |
| 179 } else if (isNumber(_next)) { |
| 180 tokenizeNumber(); |
| 181 } else if (_next == _PERIOD) { |
| 182 tokenizeDot(); |
| 183 } else if (_next == _COMMA) { |
| 184 tokenizeComma(); |
| 185 } else if (_next == _COLON) { |
| 186 tokenizeColon(); |
| 187 } else if (isOperator(_next)) { |
| 188 tokenizeOperator(); |
| 189 } else if (isGrouper(_next)) { |
| 190 tokenizeGrouper(); |
| 191 } else { |
| 192 _advance(); |
| 193 } |
| 194 } |
| 195 return _tokens; |
| 196 } |
| 197 |
| 198 tokenizeString() { |
| 199 int quoteChar = _next; |
| 200 _advance(); |
| 201 while (_next != quoteChar) { |
| 202 if (_next == null) throw new ParseException("unterminated string"); |
| 203 if (_next == _BACKSLASH) { |
| 204 _advance(); |
| 205 if (_next == null) throw new ParseException("unterminated string"); |
| 206 _sb.writeCharCode(escape(_next)); |
| 207 } else { |
| 208 _sb.writeCharCode(_next); |
| 209 } |
| 210 _advance(); |
| 211 } |
| 212 _tokens.add(new Token(STRING_TOKEN, _sb.toString())); |
| 213 _sb.clear(); |
| 214 _advance(); |
| 215 } |
| 216 |
| 217 tokenizeIdentifierOrKeyword() { |
| 218 while (_next != null && isIdentifier(_next)) { |
| 219 _sb.writeCharCode(_next); |
| 220 _advance(); |
| 221 } |
| 222 var value = _sb.toString(); |
| 223 if (KEYWORDS.contains(value)) { |
| 224 _tokens.add(new Token(KEYWORD_TOKEN, value)); |
| 225 } else { |
| 226 _tokens.add(new Token(IDENTIFIER_TOKEN, value)); |
| 227 } |
| 228 _sb.clear(); |
| 229 } |
| 230 |
| 231 tokenizeNumber() { |
| 232 while (_next != null && isNumber(_next)) { |
| 233 _sb.writeCharCode(_next); |
| 234 _advance(); |
| 235 } |
| 236 if (_next == _PERIOD) { |
| 237 tokenizeDot(); |
| 238 } else { |
| 239 _tokens.add(new Token(INTEGER_TOKEN, _sb.toString())); |
| 240 _sb.clear(); |
| 241 } |
| 242 } |
| 243 |
| 244 tokenizeDot() { |
| 245 _advance(); |
| 246 if (isNumber(_next)) { |
| 247 tokenizeFraction(); |
| 248 } else { |
| 249 _tokens.add(new Token(DOT_TOKEN, '.', POSTFIX_PRECEDENCE)); |
| 250 } |
| 251 } |
| 252 |
| 253 tokenizeComma() { |
| 254 _advance(); |
| 255 _tokens.add(new Token(COMMA_TOKEN, ',')); |
| 256 } |
| 257 |
| 258 tokenizeColon() { |
| 259 _advance(); |
| 260 _tokens.add(new Token(COLON_TOKEN, ':')); |
| 261 } |
| 262 |
| 263 tokenizeFraction() { |
| 264 _sb.writeCharCode(_PERIOD); |
| 265 while (_next != null && isNumber(_next)) { |
| 266 _sb.writeCharCode(_next); |
| 267 _advance(); |
| 268 } |
| 269 _tokens.add(new Token(DECIMAL_TOKEN, _sb.toString())); |
| 270 _sb.clear(); |
| 271 } |
| 272 |
| 273 tokenizeOperator() { |
| 274 int startChar = _next; |
| 275 _advance(); |
| 276 var op; |
| 277 // check for 2 character operators |
| 278 if (isOperator(_next)) { |
| 279 var op2 = new String.fromCharCodes([startChar, _next]); |
| 280 if (_TWO_CHAR_OPS.contains(op2)) { |
| 281 op = op2; |
| 282 _advance(); |
| 283 // kind of hacky check for === and !===, could be better / more general |
| 284 if (_next == _EQ && (startChar == _BANG || startChar == _EQ)) { |
| 285 op = op2 + '='; |
| 286 _advance(); |
| 287 } |
| 288 } else { |
| 289 op = new String.fromCharCode(startChar); |
| 290 } |
| 291 } else { |
| 292 op = new String.fromCharCode(startChar); |
| 293 } |
| 294 _tokens.add(new Token(OPERATOR_TOKEN, op, _PRECEDENCE[op])); |
| 295 } |
| 296 |
| 297 tokenizeGrouper() { |
| 298 var value = new String.fromCharCode(_next); |
| 299 _tokens.add(new Token(GROUPER_TOKEN, value, _PRECEDENCE[value])); |
| 300 _advance(); |
| 301 } |
| 302 } |
| 303 |
| 304 class ParseException implements Exception { |
| 305 final String message; |
| 306 ParseException(this.message); |
| 307 String toString() => "ParseException: $message"; |
| 308 } |
OLD | NEW |