OLD | NEW |
| (Empty) |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 library polymer_expressions.tokenizer; | |
6 | |
7 const int _TAB = 9; | |
8 const int _LF = 10; | |
9 const int _VTAB = 11; | |
10 const int _FF = 12; | |
11 const int _CR = 13; | |
12 const int _SPACE = 32; | |
13 const int _BANG = 33; | |
14 const int _DQ = 34; | |
15 const int _$ = 36; | |
16 const int _PERCENT = 37; | |
17 const int _AMPERSAND = 38; | |
18 const int _SQ = 39; | |
19 const int _OPEN_PAREN = 40; | |
20 const int _CLOSE_PAREN = 41; | |
21 const int _STAR = 42; | |
22 const int _PLUS = 43; | |
23 const int _COMMA = 44; | |
24 const int _MINUS = 45; | |
25 const int _PERIOD = 46; | |
26 const int _SLASH = 47; | |
27 const int _0 = 48; | |
28 const int _9 = 57; | |
29 const int _COLON = 58; | |
30 const int _LT = 60; | |
31 const int _EQ = 61; | |
32 const int _GT = 62; | |
33 const int _QUESTION = 63; | |
34 const int _A = 65; | |
35 const int _Z = 90; | |
36 const int _OPEN_SQUARE_BRACKET = 91; | |
37 const int _BACKSLASH = 92; | |
38 const int _CLOSE_SQUARE_BRACKET = 93; | |
39 const int _CARET = 94; | |
40 const int _US = 95; | |
41 const int _a = 97; | |
42 const int _f = 102; | |
43 const int _n = 110; | |
44 const int _r = 114; | |
45 const int _t = 116; | |
46 const int _v = 118; | |
47 const int _z = 122; | |
48 const int _OPEN_CURLY_BRACKET = 123; | |
49 const int _BAR = 124; | |
50 const int _CLOSE_CURLY_BRACKET = 125; | |
51 const int _NBSP = 160; | |
52 | |
53 const _OPERATORS = const [_PLUS, _MINUS, _STAR, _SLASH, _BANG, _AMPERSAND, | |
54 _PERCENT, _LT, _EQ, _GT, _QUESTION, _CARET, _BAR]; | |
55 | |
56 const _GROUPERS = const [_OPEN_PAREN, _CLOSE_PAREN, | |
57 _OPEN_SQUARE_BRACKET, _CLOSE_SQUARE_BRACKET, | |
58 _OPEN_CURLY_BRACKET, _CLOSE_CURLY_BRACKET]; | |
59 | |
60 const _TWO_CHAR_OPS = const ['==', '!=', '<=', '>=', '||', '&&']; | |
61 | |
62 const KEYWORDS = const ['as', 'in', 'this']; | |
63 | |
64 const _PRECEDENCE = const { | |
65 '!': 0, | |
66 ':': 0, | |
67 ',': 0, | |
68 ')': 0, | |
69 ']': 0, | |
70 '}': 0, // ? | |
71 '?': 1, | |
72 '||': 2, | |
73 '&&': 3, | |
74 '|': 4, | |
75 '^': 5, | |
76 '&': 6, | |
77 | |
78 // equality | |
79 '!=': 7, | |
80 '==': 7, | |
81 '!==': 7, | |
82 '===': 7, | |
83 | |
84 // relational | |
85 '>=': 8, | |
86 '>': 8, | |
87 '<=': 8, | |
88 '<': 8, | |
89 | |
90 // additive | |
91 '+': 9, | |
92 '-': 9, | |
93 | |
94 // multiplicative | |
95 '%': 10, | |
96 '/': 10, | |
97 '*': 10, | |
98 | |
99 // postfix | |
100 '(': 11, | |
101 '[': 11, | |
102 '.': 11, | |
103 '{': 11, //not sure this is correct | |
104 }; | |
105 | |
106 const POSTFIX_PRECEDENCE = 11; | |
107 | |
108 const int STRING_TOKEN = 1; | |
109 const int IDENTIFIER_TOKEN = 2; | |
110 const int DOT_TOKEN = 3; | |
111 const int COMMA_TOKEN = 4; | |
112 const int COLON_TOKEN = 5; | |
113 const int INTEGER_TOKEN = 6; | |
114 const int DECIMAL_TOKEN = 7; | |
115 const int OPERATOR_TOKEN = 8; | |
116 const int GROUPER_TOKEN = 9; | |
117 const int KEYWORD_TOKEN = 10; | |
118 | |
119 bool isWhitespace(int next) => next == _SPACE || next == _TAB || next == _NBSP; | |
120 | |
121 bool isIdentifierOrKeywordStart(int next) => (_a <= next && next <= _z) || | |
122 (_A <= next && next <= _Z) || next == _US || next == _$ || next > 127; | |
123 | |
124 bool isIdentifier(int next) => (_a <= next && next <= _z) || | |
125 (_A <= next && next <= _Z) || (_0 <= next && next <= _9) || | |
126 next == _US || next == _$ || next > 127; | |
127 | |
128 bool isQuote(int next) => next == _DQ || next == _SQ; | |
129 | |
130 bool isNumber(int next) => _0 <= next && next <= _9; | |
131 | |
132 bool isOperator(int next) => _OPERATORS.contains(next); | |
133 | |
134 bool isGrouper(int next) => _GROUPERS.contains(next); | |
135 | |
136 int escape(int c) { | |
137 switch (c) { | |
138 case _f: return _FF; | |
139 case _n: return _LF; | |
140 case _r: return _CR; | |
141 case _t: return _TAB; | |
142 case _v: return _VTAB; | |
143 default: return c; | |
144 } | |
145 } | |
146 | |
147 class Token { | |
148 final int kind; | |
149 final String value; | |
150 final int precedence; | |
151 | |
152 Token(this.kind, this.value, [this.precedence = 0]); | |
153 | |
154 String toString() => "($kind, '$value')"; | |
155 } | |
156 | |
157 class Tokenizer { | |
158 final List<Token> _tokens = <Token>[]; | |
159 final StringBuffer _sb = new StringBuffer(); | |
160 final RuneIterator _iterator; | |
161 | |
162 int _next; | |
163 | |
164 Tokenizer(String input) : _iterator = new RuneIterator(input); | |
165 | |
166 _advance() { | |
167 _next = _iterator.moveNext() ? _iterator.current : null; | |
168 } | |
169 | |
170 List<Token> tokenize() { | |
171 _advance(); | |
172 while(_next != null) { | |
173 if (isWhitespace(_next)) { | |
174 _advance(); | |
175 } else if (isQuote(_next)) { | |
176 tokenizeString(); | |
177 } else if (isIdentifierOrKeywordStart(_next)) { | |
178 tokenizeIdentifierOrKeyword(); | |
179 } else if (isNumber(_next)) { | |
180 tokenizeNumber(); | |
181 } else if (_next == _PERIOD) { | |
182 tokenizeDot(); | |
183 } else if (_next == _COMMA) { | |
184 tokenizeComma(); | |
185 } else if (_next == _COLON) { | |
186 tokenizeColon(); | |
187 } else if (isOperator(_next)) { | |
188 tokenizeOperator(); | |
189 } else if (isGrouper(_next)) { | |
190 tokenizeGrouper(); | |
191 } else { | |
192 _advance(); | |
193 } | |
194 } | |
195 return _tokens; | |
196 } | |
197 | |
198 tokenizeString() { | |
199 int quoteChar = _next; | |
200 _advance(); | |
201 while (_next != quoteChar) { | |
202 if (_next == null) throw new ParseException("unterminated string"); | |
203 if (_next == _BACKSLASH) { | |
204 _advance(); | |
205 if (_next == null) throw new ParseException("unterminated string"); | |
206 _sb.writeCharCode(escape(_next)); | |
207 } else { | |
208 _sb.writeCharCode(_next); | |
209 } | |
210 _advance(); | |
211 } | |
212 _tokens.add(new Token(STRING_TOKEN, _sb.toString())); | |
213 _sb.clear(); | |
214 _advance(); | |
215 } | |
216 | |
217 tokenizeIdentifierOrKeyword() { | |
218 while (_next != null && isIdentifier(_next)) { | |
219 _sb.writeCharCode(_next); | |
220 _advance(); | |
221 } | |
222 var value = _sb.toString(); | |
223 if (KEYWORDS.contains(value)) { | |
224 _tokens.add(new Token(KEYWORD_TOKEN, value)); | |
225 } else { | |
226 _tokens.add(new Token(IDENTIFIER_TOKEN, value)); | |
227 } | |
228 _sb.clear(); | |
229 } | |
230 | |
231 tokenizeNumber() { | |
232 while (_next != null && isNumber(_next)) { | |
233 _sb.writeCharCode(_next); | |
234 _advance(); | |
235 } | |
236 if (_next == _PERIOD) { | |
237 tokenizeDot(); | |
238 } else { | |
239 _tokens.add(new Token(INTEGER_TOKEN, _sb.toString())); | |
240 _sb.clear(); | |
241 } | |
242 } | |
243 | |
244 tokenizeDot() { | |
245 _advance(); | |
246 if (isNumber(_next)) { | |
247 tokenizeFraction(); | |
248 } else { | |
249 _tokens.add(new Token(DOT_TOKEN, '.', POSTFIX_PRECEDENCE)); | |
250 } | |
251 } | |
252 | |
253 tokenizeComma() { | |
254 _advance(); | |
255 _tokens.add(new Token(COMMA_TOKEN, ',')); | |
256 } | |
257 | |
258 tokenizeColon() { | |
259 _advance(); | |
260 _tokens.add(new Token(COLON_TOKEN, ':')); | |
261 } | |
262 | |
263 tokenizeFraction() { | |
264 _sb.writeCharCode(_PERIOD); | |
265 while (_next != null && isNumber(_next)) { | |
266 _sb.writeCharCode(_next); | |
267 _advance(); | |
268 } | |
269 _tokens.add(new Token(DECIMAL_TOKEN, _sb.toString())); | |
270 _sb.clear(); | |
271 } | |
272 | |
273 tokenizeOperator() { | |
274 int startChar = _next; | |
275 _advance(); | |
276 var op; | |
277 // check for 2 character operators | |
278 if (isOperator(_next)) { | |
279 var op2 = new String.fromCharCodes([startChar, _next]); | |
280 if (_TWO_CHAR_OPS.contains(op2)) { | |
281 op = op2; | |
282 _advance(); | |
283 // kind of hacky check for === and !===, could be better / more general | |
284 if (_next == _EQ && (startChar == _BANG || startChar == _EQ)) { | |
285 op = op2 + '='; | |
286 _advance(); | |
287 } | |
288 } else { | |
289 op = new String.fromCharCode(startChar); | |
290 } | |
291 } else { | |
292 op = new String.fromCharCode(startChar); | |
293 } | |
294 _tokens.add(new Token(OPERATOR_TOKEN, op, _PRECEDENCE[op])); | |
295 } | |
296 | |
297 tokenizeGrouper() { | |
298 var value = new String.fromCharCode(_next); | |
299 _tokens.add(new Token(GROUPER_TOKEN, value, _PRECEDENCE[value])); | |
300 _advance(); | |
301 } | |
302 } | |
303 | |
304 class ParseException implements Exception { | |
305 final String message; | |
306 ParseException(this.message); | |
307 String toString() => "ParseException: $message"; | |
308 } | |
OLD | NEW |