OLD | NEW |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 library PegParser; | 5 library PegParser; |
6 | 6 |
7 /* | 7 /* |
8 * The following functions are combinators for building Rules. | 8 * The following functions are combinators for building Rules. |
9 * | 9 * |
10 * A rule is one of the following | 10 * A rule is one of the following |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
48 else | 48 else |
49 return new _CharCodeRule(spec, name); | 49 return new _CharCodeRule(spec, name); |
50 } | 50 } |
51 | 51 |
52 /** | 52 /** |
53 * Matches one of the [characters]. | 53 * Matches one of the [characters]. |
54 * | 54 * |
55 * CHAR does not generate a value. | 55 * CHAR does not generate a value. |
56 */ | 56 */ |
57 _Rule CHAR([characters]) { | 57 _Rule CHAR([characters]) { |
58 if (characters == null) | 58 if (characters == null) return const _AnyCharRule(); |
59 return const _AnyCharRule(); | 59 if (characters is int) return CHARCODE(characters); |
60 if (characters is int) | |
61 return CHARCODE(characters); | |
62 | 60 |
63 // Find the range of character codes and construct an array of flags for codes | 61 // Find the range of character codes and construct an array of flags for codes |
64 // within the range. | 62 // within the range. |
65 List<int> codes = characters.codeUnits.toList(); | 63 List<int> codes = characters.codeUnits.toList(); |
66 codes.sort((a, b) => a < b ? -1 : a > b ? 1 : 0); | 64 codes.sort((a, b) => a < b ? -1 : a > b ? 1 : 0); |
67 int lo = codes[0]; | 65 int lo = codes[0]; |
68 int hi = codes[codes.length - 1]; | 66 int hi = codes[codes.length - 1]; |
69 if (lo == hi) | 67 if (lo == hi) return CHARCODE(lo); |
70 return CHARCODE(lo); | |
71 int len = hi - lo + 1; | 68 int len = hi - lo + 1; |
72 var flags = new List<bool>(len); | 69 var flags = new List<bool>(len); |
73 for (int i = 0; i < len; ++i) | 70 for (int i = 0; i < len; ++i) flags[i] = false; |
74 flags[i] = false; | 71 for (int code in codes) flags[code - lo] = true; |
75 for (int code in codes) | |
76 flags[code - lo] = true; | |
77 | 72 |
78 return CHARCODE((code) => code >= lo && code <= hi && flags[code - lo]); | 73 return CHARCODE((code) => code >= lo && code <= hi && flags[code - lo]); |
79 } | 74 } |
80 | 75 |
81 /** | 76 /** |
82 * Matches the end of the input. | 77 * Matches the end of the input. |
83 * | 78 * |
84 * END does not generate a value. | 79 * END does not generate a value. |
85 */ | 80 */ |
86 _Rule get END => new _EndOfInputRule(); | 81 _Rule get END => new _EndOfInputRule(); |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
130 return new _LexicalRule(arg1, _compile(arg2)); | 125 return new _LexicalRule(arg1, _compile(arg2)); |
131 } | 126 } |
132 | 127 |
133 /** | 128 /** |
134 * Matches [rule] and generates a value from the matched text. If the [rule] | 129 * Matches [rule] and generates a value from the matched text. If the [rule] |
135 * matches, then TEXT(rule) matches and has a value derived from the string | 130 * matches, then TEXT(rule) matches and has a value derived from the string |
136 * fragment that was matched. The default derived value is the string fragment. | 131 * fragment that was matched. The default derived value is the string fragment. |
137 * | 132 * |
138 * TEXT always generates a value. | 133 * TEXT always generates a value. |
139 */ | 134 */ |
140 _Rule TEXT(rule, [extractor]) => | 135 _Rule TEXT(rule, [extractor]) => new _TextValueRule( |
141 new _TextValueRule(_compile(rule), | 136 _compile(rule), |
142 extractor == null | 137 extractor == null |
143 ? (string, start, end) => string.substring(start, end) | 138 ? (string, start, end) => string.substring(start, end) |
144 : extractor); | 139 : extractor); |
145 | 140 |
146 /** | 141 /** |
147 * Matches an optional rule. | 142 * Matches an optional rule. |
148 * | 143 * |
149 * MAYBE is a value generating matcher. | 144 * MAYBE is a value generating matcher. |
150 * | 145 * |
151 * If [rule] is value generating then the value is the value generated by [rule] | 146 * If [rule] is value generating then the value is the value generated by [rule] |
152 * if it matches, and [:null:] if it does not. | 147 * if it matches, and [:null:] if it does not. |
153 * | 148 * |
154 * If [rule] is not value generatinge then the value is [:true:] if [rule] | 149 * If [rule] is not value generatinge then the value is [:true:] if [rule] |
(...skipping 23 matching lines...) Expand all Loading... |
178 */ | 173 */ |
179 _Rule MANY0(rule, [separator = null]) { | 174 _Rule MANY0(rule, [separator = null]) { |
180 return new _RepeatRule(_compile(rule), _compileOptional(separator), 0); | 175 return new _RepeatRule(_compile(rule), _compileOptional(separator), 0); |
181 } | 176 } |
182 | 177 |
183 /** | 178 /** |
184 * Matches [rules] in order until one succeeds. | 179 * Matches [rules] in order until one succeeds. |
185 * | 180 * |
186 * OR is value-generating. | 181 * OR is value-generating. |
187 */ | 182 */ |
188 _Rule OR([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]) => | 183 _Rule OR( |
| 184 [a, |
| 185 b, |
| 186 c, |
| 187 d, |
| 188 e, |
| 189 f, |
| 190 g, |
| 191 h, |
| 192 i, |
| 193 j, |
| 194 k, |
| 195 l, |
| 196 m, |
| 197 n, |
| 198 o, |
| 199 p, |
| 200 q, |
| 201 r, |
| 202 s, |
| 203 t, |
| 204 u, |
| 205 v, |
| 206 w, |
| 207 x, |
| 208 y, |
| 209 z]) => |
189 _compileMultiRule( | 210 _compileMultiRule( |
190 (a is List && b == null) // Backward compat. OR([a, b]) => OR(a, b). | 211 (a is List && b == null) // Backward compat. OR([a, b]) => OR(a, b). |
191 ? a | 212 ? a |
192 : _unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z), | 213 : _unspread(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, |
| 214 t, u, v, w, x, y, z), |
193 false, | 215 false, |
194 (compiledRules, valueCount, reducer) => | 216 (compiledRules, valueCount, reducer) => new _ChoiceRule(compiledRules)); |
195 new _ChoiceRule(compiledRules)); | |
196 | 217 |
197 | 218 _Rule SEQ( |
198 | 219 [a, |
199 _Rule SEQ([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]) => | 220 b, |
200 _compile(_unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z)); | 221 c, |
| 222 d, |
| 223 e, |
| 224 f, |
| 225 g, |
| 226 h, |
| 227 i, |
| 228 j, |
| 229 k, |
| 230 l, |
| 231 m, |
| 232 n, |
| 233 o, |
| 234 p, |
| 235 q, |
| 236 r, |
| 237 s, |
| 238 t, |
| 239 u, |
| 240 v, |
| 241 w, |
| 242 x, |
| 243 y, |
| 244 z]) => |
| 245 _compile(_unspread(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, |
| 246 t, u, v, w, x, y, z)); |
201 | 247 |
202 /** | 248 /** |
203 * Matches [rule] | 249 * Matches [rule] |
204 */ | 250 */ |
205 _Rule MEMO(rule) => new _MemoRule(_compile(rule)); | 251 _Rule MEMO(rule) => new _MemoRule(_compile(rule)); |
206 | 252 |
207 _Rule TAG(tag, rule) => _compile([rule, (ast) => [tag, ast]]); | 253 _Rule TAG(tag, rule) => _compile([ |
208 | 254 rule, |
| 255 (ast) => [tag, ast] |
| 256 ]); |
209 | 257 |
210 class ParseError implements Exception { | 258 class ParseError implements Exception { |
211 const ParseError(String this._message); | 259 const ParseError(String this._message); |
212 String toString() => _message; | 260 String toString() => _message; |
213 final String _message; | 261 final String _message; |
214 } | 262 } |
215 | 263 |
216 /** | 264 /** |
217 * A grammar is a collection of symbols and rules that may be used to parse an | 265 * A grammar is a collection of symbols and rules that may be used to parse an |
218 * input. | 266 * input. |
219 */ | 267 */ |
220 class Grammar { | 268 class Grammar { |
221 Map<String, Symbol> _symbols; | 269 Map<String, Symbol> _symbols; |
222 | 270 |
223 /** This rule may be set by the user to define whitespace. */ | 271 /** This rule may be set by the user to define whitespace. */ |
224 _Rule _whitespace; | 272 _Rule _whitespace; |
225 | 273 |
226 _Rule get whitespace => _whitespace; | 274 _Rule get whitespace => _whitespace; |
227 void set whitespace(rule) { _whitespace = _compile(rule); } | 275 void set whitespace(rule) { |
| 276 _whitespace = _compile(rule); |
| 277 } |
228 | 278 |
229 Grammar() { | 279 Grammar() { |
230 _symbols = new Map<String, Symbol>(); | 280 _symbols = new Map<String, Symbol>(); |
231 whitespace = CHAR(' \t\r\n'); | 281 whitespace = CHAR(' \t\r\n'); |
232 } | 282 } |
233 | 283 |
234 /** | 284 /** |
235 * operator [] is used to find or create symbols. Symbols may appear in rules | 285 * operator [] is used to find or create symbols. Symbols may appear in rules |
236 * to define recursive rules. | 286 * to define recursive rules. |
237 */ | 287 */ |
238 Symbol operator [](String name) { | 288 Symbol operator [](String name) { |
239 if (_symbols.containsKey(name)) | 289 if (_symbols.containsKey(name)) return _symbols[name]; |
240 return _symbols[name]; | |
241 Symbol s = new Symbol(name, this); | 290 Symbol s = new Symbol(name, this); |
242 _symbols[name] = s; | 291 _symbols[name] = s; |
243 return s; | 292 return s; |
244 } | 293 } |
245 | 294 |
246 /** | 295 /** |
247 * Parses the input string and returns the parsed AST, or throws an exception | 296 * Parses the input string and returns the parsed AST, or throws an exception |
248 * if the input can't be parsed. | 297 * if the input can't be parsed. |
249 */ | 298 */ |
250 parse(root, String text) { | 299 parse(root, String text) { |
251 for (var symbol in _symbols.values) | 300 for (var symbol in _symbols.values) |
252 if (symbol._rule == null) | 301 if (symbol._rule == null) print('${symbol.name} is undefined'); |
253 print('${symbol.name} is undefined'); | |
254 | 302 |
255 var state = new _ParserState(text, whitespace: whitespace); | 303 var state = new _ParserState(text, whitespace: whitespace); |
256 var match = _compile(root).match(state, 0); | 304 var match = _compile(root).match(state, 0); |
257 if (match == null) | 305 if (match == null) return diagnose(state); |
258 return diagnose(state); | |
259 var pos = match[0]; | 306 var pos = match[0]; |
260 pos = _skip_whitespace(state, pos); | 307 pos = _skip_whitespace(state, pos); |
261 if (pos == state._end) | 308 if (pos == state._end) return match[1]; |
262 return match[1]; | |
263 // TODO: Make this complain about expecting end of file. | 309 // TODO: Make this complain about expecting end of file. |
264 return diagnose(state); | 310 return diagnose(state); |
265 } | 311 } |
266 | 312 |
267 diagnose(state) { | 313 diagnose(state) { |
268 var message = 'unexpected error'; | 314 var message = 'unexpected error'; |
269 if (!state.max_rule.isEmpty) { | 315 if (!state.max_rule.isEmpty) { |
270 var s = new Set(); | 316 var s = new Set(); |
271 for (var rule in state.max_rule) | 317 for (var rule in state.max_rule) s.add(rule.description()); |
272 s.add(rule.description()); | |
273 var tokens = new List<String>.from(s); | 318 var tokens = new List<String>.from(s); |
274 tokens.sort((a, b) => | 319 tokens.sort((a, b) => a.startsWith("'") == b.startsWith("'") |
275 a.startsWith("'") == b.startsWith("'") | 320 ? a.compareTo(b) |
276 ? a.compareTo(b) | 321 : a.startsWith("'") ? 1 : -1); |
277 : a.startsWith("'") ? 1 : -1); | |
278 var expected = tokens.join(' or '); | 322 var expected = tokens.join(' or '); |
279 var found = state.max_pos == state._end ? 'end of file' | 323 var found = state.max_pos == state._end |
| 324 ? 'end of file' |
280 : "'${state._text[state.max_pos]}'"; | 325 : "'${state._text[state.max_pos]}'"; |
281 message = 'Expected $expected but found $found'; | 326 message = 'Expected $expected but found $found'; |
282 } | 327 } |
283 int start = state.max_pos; | 328 int start = state.max_pos; |
284 int end = start; | 329 int end = start; |
285 while (start >= 1 && state._text[start - 1] != '\n') --start; | 330 while (start >= 1 && state._text[start - 1] != '\n') --start; |
286 while (end < state._text.length && state._text[end] != '\n') ++end; | 331 while (end < state._text.length && state._text[end] != '\n') ++end; |
287 var line = state._text.substring(start, end); | 332 var line = state._text.substring(start, end); |
288 var indicator = ''; | 333 var indicator = ''; |
289 for (var i = 0; i < line.length && start + i < state.max_pos; i++) | 334 for (var i = 0; i < line.length && start + i < state.max_pos; i++) |
290 indicator = ' $indicator'; | 335 indicator = ' $indicator'; |
291 indicator = '$indicator^'; | 336 indicator = '$indicator^'; |
292 // TODO: Convert to an exception. | 337 // TODO: Convert to an exception. |
293 print(message); | 338 print(message); |
294 print(line); | 339 print(line); |
295 print(indicator); | 340 print(indicator); |
296 return null; | 341 return null; |
297 } | 342 } |
298 } | 343 } |
299 | 344 |
300 class Symbol { | 345 class Symbol { |
301 final String name; | 346 final String name; |
302 final Grammar grammar; | 347 final Grammar grammar; |
303 _Rule _rule; | 348 _Rule _rule; |
304 | 349 |
305 Symbol(this.name, this.grammar); | 350 Symbol(this.name, this.grammar); |
306 | 351 |
307 void set def(rule) { | 352 void set def(rule) { |
308 assert(_rule == null); // Assign once. | 353 assert(_rule == null); // Assign once. |
309 _rule = _compile(rule); | 354 _rule = _compile(rule); |
310 } | 355 } |
311 | 356 |
312 toString() => _rule == null ? '<$name>' : '<$name = $_rule>'; | 357 toString() => _rule == null ? '<$name>' : '<$name = $_rule>'; |
313 } | 358 } |
314 | 359 |
315 | |
316 class _ParserState { | 360 class _ParserState { |
317 _ParserState(this._text, {_Rule whitespace}) { | 361 _ParserState(this._text, {_Rule whitespace}) { |
318 _end = this._text.length; | 362 _end = this._text.length; |
319 whitespaceRule = whitespace; | 363 whitespaceRule = whitespace; |
320 max_rule = []; | 364 max_rule = []; |
321 } | 365 } |
322 | 366 |
323 String _text; | 367 String _text; |
324 int _end; | 368 int _end; |
325 | 369 |
(...skipping 12 matching lines...) Expand all Loading... |
338 * of the rule is something sensible to put in a message. | 382 * of the rule is something sensible to put in a message. |
339 */ | 383 */ |
340 abstract class _Expectable { | 384 abstract class _Expectable { |
341 String description(); | 385 String description(); |
342 } | 386 } |
343 | 387 |
344 class _Rule { | 388 class _Rule { |
345 const _Rule(); | 389 const _Rule(); |
346 // Returns null for a match failure or [pos, ast] for success. | 390 // Returns null for a match failure or [pos, ast] for success. |
347 match(_ParserState state, int pos) { | 391 match(_ParserState state, int pos) { |
348 if (! state.inWhitespaceMode) { | 392 if (!state.inWhitespaceMode) { |
349 pos = _skip_whitespace(state, pos); | 393 pos = _skip_whitespace(state, pos); |
350 } | 394 } |
351 return matchAfterWS(state, pos); | 395 return matchAfterWS(state, pos); |
352 } | 396 } |
353 | 397 |
354 // Faster entry point for matching a sub-rule that is matched to the start | 398 // Faster entry point for matching a sub-rule that is matched to the start |
355 // position of the super-rule. Whitespace has already been skipped so no need | 399 // position of the super-rule. Whitespace has already been skipped so no need |
356 // to try to skip it again. | 400 // to try to skip it again. |
357 matchAfterWS(_ParserState state, int pos) { | 401 matchAfterWS(_ParserState state, int pos) { |
358 if (state.inhibitExpectedTrackingDepth == 0) { | 402 if (state.inhibitExpectedTrackingDepth == 0) { |
(...skipping 21 matching lines...) Expand all Loading... |
380 | 424 |
381 // Does the rule generate a value (AST) with the match? | 425 // Does the rule generate a value (AST) with the match? |
382 bool get generatesValue => false; | 426 bool get generatesValue => false; |
383 | 427 |
384 get defaultValue => null; | 428 get defaultValue => null; |
385 } | 429 } |
386 | 430 |
387 int _skip_whitespace(state, pos) { | 431 int _skip_whitespace(state, pos) { |
388 // Returns the next non-whitespace position. | 432 // Returns the next non-whitespace position. |
389 // This is done by matching the optional whitespaceRule with the current text. | 433 // This is done by matching the optional whitespaceRule with the current text. |
390 if (state.whitespaceRule == null) | 434 if (state.whitespaceRule == null) return pos; |
391 return pos; | |
392 state.inWhitespaceMode = true; | 435 state.inWhitespaceMode = true; |
393 state.inhibitExpectedTrackingDepth++; | 436 state.inhibitExpectedTrackingDepth++; |
394 while (true) { | 437 while (true) { |
395 var match = state.whitespaceRule.match(state, pos); | 438 var match = state.whitespaceRule.match(state, pos); |
396 if (match == null) | 439 if (match == null) break; |
397 break; | |
398 pos = match[0]; | 440 pos = match[0]; |
399 } | 441 } |
400 state.inWhitespaceMode = false; | 442 state.inWhitespaceMode = false; |
401 state.inhibitExpectedTrackingDepth--; | 443 state.inhibitExpectedTrackingDepth--; |
402 return pos; | 444 return pos; |
403 } | 445 } |
404 | 446 |
405 | |
406 _Rule _compileOptional(rule) { | 447 _Rule _compileOptional(rule) { |
407 return rule == null ? null : _compile(rule); | 448 return rule == null ? null : _compile(rule); |
408 } | 449 } |
409 | 450 |
410 _Rule _compile(rule) { | 451 _Rule _compile(rule) { |
411 if (rule is _Rule) | 452 if (rule is _Rule) return rule; |
412 return rule; | 453 if (rule is String) return new _StringRule(rule); |
413 if (rule is String) | 454 if (rule is Symbol) return new _SymbolRule(rule); |
414 return new _StringRule(rule); | 455 if (rule is RegExp) return new _RegExpRule(rule); |
415 if (rule is Symbol) | |
416 return new _SymbolRule(rule); | |
417 if (rule is RegExp) | |
418 return new _RegExpRule(rule); | |
419 if (rule is List) { | 456 if (rule is List) { |
420 return _compileMultiRule( | 457 return _compileMultiRule( |
421 rule, true, | 458 rule, |
| 459 true, |
422 (compiledRules, valueCount, reducer) => | 460 (compiledRules, valueCount, reducer) => |
423 new _SequenceRule(compiledRules, valueCount, reducer)); | 461 new _SequenceRule(compiledRules, valueCount, reducer)); |
424 } | 462 } |
425 throw new Exception('Cannot compile rule: $rule'); | 463 throw new Exception('Cannot compile rule: $rule'); |
426 } | 464 } |
427 | 465 |
428 class _EndOfInputRule extends _Rule { | 466 class _EndOfInputRule extends _Rule { |
429 _match(_ParserState state, int pos) { | 467 _match(_ParserState state, int pos) { |
430 if (pos == state._end) | 468 if (pos == state._end) return [pos, null]; |
431 return [pos, null]; | |
432 return null; | 469 return null; |
433 } | 470 } |
434 | 471 |
435 toString() => 'END'; | 472 toString() => 'END'; |
436 } | 473 } |
437 | 474 |
438 class _ErrorRule extends _Rule { | 475 class _ErrorRule extends _Rule { |
439 String message; | 476 String message; |
440 _ErrorRule(String this.message); | 477 _ErrorRule(String this.message); |
441 _match(_ParserState state, int pos) { | 478 _match(_ParserState state, int pos) { |
442 throw new ParseError(message); | 479 throw new ParseError(message); |
443 } | 480 } |
444 | 481 |
445 toString() => 'ERROR($message)'; | 482 toString() => 'ERROR($message)'; |
446 } | 483 } |
447 | 484 |
448 class _CharCodeRule extends _Rule { | 485 class _CharCodeRule extends _Rule { |
449 Function _predicate; | 486 Function _predicate; |
450 var _name; | 487 var _name; |
451 _CharCodeRule(this._predicate, this._name); | 488 _CharCodeRule(this._predicate, this._name); |
452 _match(_ParserState state, int pos) { | 489 _match(_ParserState state, int pos) { |
453 if (pos == state._end) | 490 if (pos == state._end) return null; |
454 return null; | |
455 int code = state._text.codeUnitAt(pos); | 491 int code = state._text.codeUnitAt(pos); |
456 if (_predicate(code)) | 492 if (_predicate(code)) return [pos + 1, null]; |
457 return [pos + 1, null]; | |
458 return null; | 493 return null; |
459 } | 494 } |
460 | 495 |
461 toString() => _name == null ? 'CHARCODE($_predicate)' : 'CHARCODE($_name)'; | 496 toString() => _name == null ? 'CHARCODE($_predicate)' : 'CHARCODE($_name)'; |
462 } | 497 } |
463 | 498 |
464 class _AnyCharRule extends _Rule { | 499 class _AnyCharRule extends _Rule { |
465 const _AnyCharRule(); | 500 const _AnyCharRule(); |
466 _match(_ParserState state, int pos) { | 501 _match(_ParserState state, int pos) { |
467 if (pos == state._end) | 502 if (pos == state._end) return null; |
468 return null; | |
469 return [pos + 1, null]; | 503 return [pos + 1, null]; |
470 } | 504 } |
471 | 505 |
472 toString() => 'CHAR()'; | 506 toString() => 'CHAR()'; |
473 } | 507 } |
474 | 508 |
475 class _SymbolRule extends _Rule { | 509 class _SymbolRule extends _Rule { |
476 final Symbol _symbol; | 510 final Symbol _symbol; |
477 _SymbolRule(Symbol this._symbol); | 511 _SymbolRule(Symbol this._symbol); |
478 _match(_ParserState state, int pos) { | 512 _match(_ParserState state, int pos) { |
479 if (_symbol._rule == null) | 513 if (_symbol._rule == null) |
480 throw new Exception("Symbol '${_symbol.name}' is undefined"); | 514 throw new Exception("Symbol '${_symbol.name}' is undefined"); |
481 return _symbol._rule.match(state, pos); | 515 return _symbol._rule.match(state, pos); |
482 } | 516 } |
483 | 517 |
484 bool get generatesValue => true; | 518 bool get generatesValue => true; |
485 | 519 |
486 toString() => '<${_symbol.name}>'; | 520 toString() => '<${_symbol.name}>'; |
487 } | 521 } |
488 | 522 |
489 class _SkipRule extends _Rule { | 523 class _SkipRule extends _Rule { |
490 // A rule that has no value. | 524 // A rule that has no value. |
491 _Rule _rule; | 525 _Rule _rule; |
492 _SkipRule(_Rule this._rule); | 526 _SkipRule(_Rule this._rule); |
493 _match(_ParserState state, int pos) { | 527 _match(_ParserState state, int pos) { |
494 var match = _rule.matchAfterWS(state, pos); | 528 var match = _rule.matchAfterWS(state, pos); |
495 if (match == null) | 529 if (match == null) return null; |
496 return null; | |
497 return [match[0], null]; | 530 return [match[0], null]; |
498 } | 531 } |
499 | 532 |
500 toString() => 'TOKEN($_rule)'; | 533 toString() => 'TOKEN($_rule)'; |
501 } | 534 } |
502 | 535 |
503 class _StringRule extends _Rule implements _Expectable { | 536 class _StringRule extends _Rule implements _Expectable { |
504 final String _string; | 537 final String _string; |
505 int _len; | 538 int _len; |
506 _StringRule(this._string) { | 539 _StringRule(this._string) { |
507 _len = _string.length; | 540 _len = _string.length; |
508 } | 541 } |
509 | 542 |
510 _match(_ParserState state, int pos) { | 543 _match(_ParserState state, int pos) { |
511 if (pos + _len > state._end) | 544 if (pos + _len > state._end) return null; |
512 return null; | |
513 for (int i = 0; i < _len; i++) { | 545 for (int i = 0; i < _len; i++) { |
514 if (state._text.codeUnitAt(pos + i) != _string.codeUnitAt(i)) | 546 if (state._text.codeUnitAt(pos + i) != _string.codeUnitAt(i)) return null; |
515 return null; | |
516 } | 547 } |
517 return [pos + _len, null]; | 548 return [pos + _len, null]; |
518 } | 549 } |
519 | 550 |
520 //get defaultValue => _string; | 551 //get defaultValue => _string; |
521 | 552 |
522 toString() => '"$_string"'; | 553 toString() => '"$_string"'; |
523 | 554 |
524 description() => "'$_string'"; | 555 description() => "'$_string'"; |
525 } | 556 } |
(...skipping 23 matching lines...) Expand all Loading... |
549 return match; | 580 return match; |
550 } | 581 } |
551 | 582 |
552 toString() => _name; | 583 toString() => _name; |
553 | 584 |
554 description() => _name == null ? '?' : _name; | 585 description() => _name == null ? '?' : _name; |
555 } | 586 } |
556 | 587 |
557 class _TextValueRule extends _Rule { | 588 class _TextValueRule extends _Rule { |
558 final _Rule _rule; | 589 final _Rule _rule; |
559 final _extract; // Function | 590 final _extract; // Function |
560 | 591 |
561 _TextValueRule(_Rule this._rule, Function this._extract); | 592 _TextValueRule(_Rule this._rule, Function this._extract); |
562 | 593 |
563 _match(_ParserState state, int pos) { | 594 _match(_ParserState state, int pos) { |
564 var match = _rule.matchAfterWS(state, pos); | 595 var match = _rule.matchAfterWS(state, pos); |
565 if (match == null) { | 596 if (match == null) { |
566 return null; | 597 return null; |
567 } | 598 } |
568 var endPos = match[0]; | 599 var endPos = match[0]; |
569 return [endPos, _extract(state._text, pos, endPos)]; | 600 return [endPos, _extract(state._text, pos, endPos)]; |
570 } | 601 } |
571 | 602 |
572 bool get generatesValue => true; | 603 bool get generatesValue => true; |
573 | 604 |
574 toString() => 'TEXT($_rule)'; | 605 toString() => 'TEXT($_rule)'; |
575 } | 606 } |
576 | 607 |
577 _Rule _compileMultiRule(List rules, | 608 _Rule _compileMultiRule( |
578 bool allowReducer, | 609 List rules, bool allowReducer, finish(compiledRules, valueCount, reducer)) { |
579 finish(compiledRules, valueCount, reducer)) { | |
580 int valueCount = 0; | 610 int valueCount = 0; |
581 List compiledRules = new List<_Rule>(); | 611 List compiledRules = new List<_Rule>(); |
582 Function reducer; | 612 Function reducer; |
583 for (var rule in rules) { | 613 for (var rule in rules) { |
584 if (reducer != null) | 614 if (reducer != null) |
585 throw new Exception('Reducer must be last in sequence: $rule'); | 615 throw new Exception('Reducer must be last in sequence: $rule'); |
586 if (rule is Function) { | 616 if (rule is Function) { |
587 if (allowReducer) | 617 if (allowReducer) |
588 reducer = rule; | 618 reducer = rule; |
589 else | 619 else |
590 throw new Exception('Bad rule: "$rule"'); | 620 throw new Exception('Bad rule: "$rule"'); |
591 } else { | 621 } else { |
592 _Rule compiledRule = _compile(rule); | 622 _Rule compiledRule = _compile(rule); |
593 if (compiledRule.generatesValue) | 623 if (compiledRule.generatesValue) ++valueCount; |
594 ++valueCount; | |
595 compiledRules.add(compiledRule); | 624 compiledRules.add(compiledRule); |
596 } | 625 } |
597 } | 626 } |
598 return finish(compiledRules, valueCount, reducer); | 627 return finish(compiledRules, valueCount, reducer); |
599 } | 628 } |
600 | 629 |
601 String _formatMultiRule(String functor, List rules) { | 630 String _formatMultiRule(String functor, List rules) { |
602 var sb = new StringBuffer(functor); | 631 var sb = new StringBuffer(functor); |
603 sb.write('('); | 632 sb.write('('); |
604 var separator = ''; | 633 var separator = ''; |
605 for (var rule in rules) { | 634 for (var rule in rules) { |
606 sb.write(separator); | 635 sb.write(separator); |
607 sb.write(rule); | 636 sb.write(rule); |
608 separator = ','; | 637 separator = ','; |
609 } | 638 } |
610 sb.write(')'); | 639 sb.write(')'); |
611 return sb.toString(); | 640 return sb.toString(); |
612 } | 641 } |
613 | 642 |
614 class _SequenceRule extends _Rule { | 643 class _SequenceRule extends _Rule { |
615 // This rule matches the component rules in order. | 644 // This rule matches the component rules in order. |
616 final List<_Rule> _rules; | 645 final List<_Rule> _rules; |
617 final int _generatingSubRules; | 646 final int _generatingSubRules; |
618 final Function _reducer; | 647 final Function _reducer; |
619 bool _generatesValue; | 648 bool _generatesValue; |
620 _SequenceRule(List<_Rule> this._rules, | 649 _SequenceRule(List<_Rule> this._rules, int this._generatingSubRules, |
621 int this._generatingSubRules, | 650 Function this._reducer) { |
622 Function this._reducer) { | |
623 _generatesValue = _generatingSubRules > 0 || _reducer != null; | 651 _generatesValue = _generatingSubRules > 0 || _reducer != null; |
624 } | 652 } |
625 | 653 |
626 _match(state, pos) { | 654 _match(state, pos) { |
627 var sequence = []; | 655 var sequence = []; |
628 for (var rule in _rules) { | 656 for (var rule in _rules) { |
629 var match = rule.match(state, pos); | 657 var match = rule.match(state, pos); |
630 if (match == null) | 658 if (match == null) return null; |
631 return null; | |
632 if (rule.generatesValue) { | 659 if (rule.generatesValue) { |
633 var ast = match[1]; | 660 var ast = match[1]; |
634 sequence.add(ast); | 661 sequence.add(ast); |
635 } | 662 } |
636 pos = match[0]; | 663 pos = match[0]; |
637 } | 664 } |
638 if (_reducer == null) { | 665 if (_reducer == null) { |
639 if (_generatingSubRules == 0) | 666 if (_generatingSubRules == 0) return [pos, null]; |
640 return [pos, null]; | 667 if (_generatingSubRules == 1) return [pos, sequence[0]]; |
641 if (_generatingSubRules == 1) | |
642 return [pos, sequence[0]]; | |
643 return [pos, sequence]; | 668 return [pos, sequence]; |
644 } else { | 669 } else { |
645 return [pos, _apply(_reducer, sequence)]; | 670 return [pos, _apply(_reducer, sequence)]; |
646 } | 671 } |
647 } | 672 } |
648 | 673 |
649 bool get generatesValue => _generatesValue; | 674 bool get generatesValue => _generatesValue; |
650 | 675 |
651 toString() => _formatMultiRule('SEQ', _rules); | 676 toString() => _formatMultiRule('SEQ', _rules); |
652 } | 677 } |
(...skipping 23 matching lines...) Expand all Loading... |
676 bool get generatesValue => true; | 701 bool get generatesValue => true; |
677 | 702 |
678 toString() => _formatMultiRule('OR', _rules); | 703 toString() => _formatMultiRule('OR', _rules); |
679 } | 704 } |
680 | 705 |
681 class _OptionalRule extends _Rule { | 706 class _OptionalRule extends _Rule { |
682 _Rule _rule; | 707 _Rule _rule; |
683 _OptionalRule(_Rule this._rule); | 708 _OptionalRule(_Rule this._rule); |
684 _match(_ParserState state, int pos) { | 709 _match(_ParserState state, int pos) { |
685 var match = _rule.match(state, pos); | 710 var match = _rule.match(state, pos); |
686 if (_rule.generatesValue) | 711 if (_rule.generatesValue) return match == null ? [pos, null] : match; |
687 return match == null | 712 return match == null ? [pos, false] : [match[0], true]; |
688 ? [pos, null] | |
689 : match; | |
690 return match == null | |
691 ? [pos, false] | |
692 : [match[0], true]; | |
693 } | 713 } |
694 | 714 |
695 bool get generatesValue => true; | 715 bool get generatesValue => true; |
696 | 716 |
697 toString() => 'MAYBE($_rule)'; | 717 toString() => 'MAYBE($_rule)'; |
698 } | 718 } |
699 | 719 |
700 class _ContextRule extends _Rule { | 720 class _ContextRule extends _Rule { |
701 _Rule _rule; | 721 _Rule _rule; |
702 _ContextRule(_Rule this._rule); | 722 _ContextRule(_Rule this._rule); |
703 _match(_ParserState state, int pos) { | 723 _match(_ParserState state, int pos) { |
704 // TODO: protect error state. | 724 // TODO: protect error state. |
705 var match = _rule._match(state, pos); | 725 var match = _rule._match(state, pos); |
706 if (match == null) | 726 if (match == null) return null; |
707 return null; | |
708 return [pos, null]; | 727 return [pos, null]; |
709 } | 728 } |
710 | 729 |
711 toString() => 'AT($_rule)'; | 730 toString() => 'AT($_rule)'; |
712 } | 731 } |
713 | 732 |
714 class _NegativeContextRule extends _Rule { | 733 class _NegativeContextRule extends _Rule { |
715 _Rule _rule; | 734 _Rule _rule; |
716 _NegativeContextRule(_Rule this._rule); | 735 _NegativeContextRule(_Rule this._rule); |
717 _match(_ParserState state, int pos) { | 736 _match(_ParserState state, int pos) { |
718 // TODO: protect error state. | 737 // TODO: protect error state. |
719 var match = _rule._match(state, pos); | 738 var match = _rule._match(state, pos); |
720 if (match == null) | 739 if (match == null) return [pos, null]; |
721 return [pos, null]; | |
722 return null; | 740 return null; |
723 } | 741 } |
724 | 742 |
725 toString() => 'NOT($_rule)'; | 743 toString() => 'NOT($_rule)'; |
726 } | 744 } |
727 | 745 |
728 class _RepeatRule extends _Rule { | 746 class _RepeatRule extends _Rule { |
729 // Matches zero, one or more items. | 747 // Matches zero, one or more items. |
730 _Rule _rule; | 748 _Rule _rule; |
731 _Rule _separator; | 749 _Rule _separator; |
732 int _min; | 750 int _min; |
733 | 751 |
734 _RepeatRule(this._rule, this._separator, this._min); | 752 _RepeatRule(this._rule, this._separator, this._min); |
735 | 753 |
736 _match(state, pos) { | 754 _match(state, pos) { |
737 // First match. | 755 // First match. |
738 var match = _rule.match(state, pos); | 756 var match = _rule.match(state, pos); |
739 if (match == null) | 757 if (match == null) if (_min == 0) |
740 if (_min == 0) | 758 return [pos, []]; |
741 return [pos, []]; | 759 else |
742 else | 760 return null; |
743 return null; | |
744 pos = match[0]; | 761 pos = match[0]; |
745 var result = [match[1]]; | 762 var result = [match[1]]; |
746 | 763 |
747 // Subsequent matches: | 764 // Subsequent matches: |
748 while (true) { | 765 while (true) { |
749 var newPos = pos; | 766 var newPos = pos; |
750 if (_separator != null) { | 767 if (_separator != null) { |
751 match = _separator.match(state, pos); | 768 match = _separator.match(state, pos); |
752 if (match == null) | 769 if (match == null) return [pos, result]; |
753 return [pos, result]; | |
754 newPos = match[0]; | 770 newPos = match[0]; |
755 } | 771 } |
756 match = _rule.match(state, newPos); | 772 match = _rule.match(state, newPos); |
757 if (match == null) | 773 if (match == null) return [pos, result]; |
758 return [pos, result]; | |
759 pos = match[0]; | 774 pos = match[0]; |
760 result.add(match[1]); | 775 result.add(match[1]); |
761 } | 776 } |
762 } | 777 } |
763 | 778 |
764 bool get generatesValue => true; | 779 bool get generatesValue => true; |
765 | 780 |
766 toString() => 'MANY(min:$_min, $_rule${_separator==null?'':", sep: $_separator
"})'; | 781 toString() => |
| 782 'MANY(min:$_min, $_rule${_separator==null?'':", sep: $_separator"})'; |
767 } | 783 } |
768 | 784 |
769 class _MemoRule extends _Rule { | 785 class _MemoRule extends _Rule { |
770 final _Rule _rule; | 786 final _Rule _rule; |
771 | 787 |
772 var parseInstance; | 788 var parseInstance; |
773 | 789 |
774 // A map from position to result. Can this be replaced with something | 790 // A map from position to result. Can this be replaced with something |
775 // smaller? | 791 // smaller? |
776 // TODO: figure out how to discard the map and parseInstance after parsing. | 792 // TODO: figure out how to discard the map and parseInstance after parsing. |
777 Map<int,Object> map; | 793 Map<int, Object> map; |
778 | 794 |
779 _MemoRule(this._rule); | 795 _MemoRule(this._rule); |
780 | 796 |
781 _match(state, pos) { | 797 _match(state, pos) { |
782 // See if we are still parsing the same input. Relies on the fact that the | 798 // See if we are still parsing the same input. Relies on the fact that the |
783 // input is a string and strings are immutable. | 799 // input is a string and strings are immutable. |
784 if (!identical(parseInstance, state._text)) { | 800 if (!identical(parseInstance, state._text)) { |
785 map = new Map<int,Object>(); | 801 map = new Map<int, Object>(); |
786 parseInstance = state._text; | 802 parseInstance = state._text; |
787 } | 803 } |
788 // TODO: does this have to check or preserve parse state (like | 804 // TODO: does this have to check or preserve parse state (like |
789 // inWhitespaceMode, error position info etc?) | 805 // inWhitespaceMode, error position info etc?) |
790 // Stored result can be null (memoized failure). | 806 // Stored result can be null (memoized failure). |
791 if (map.containsKey(pos)) { | 807 if (map.containsKey(pos)) { |
792 return map[pos]; | 808 return map[pos]; |
793 } | 809 } |
794 var match = _rule.match(state, pos); | 810 var match = _rule.match(state, pos); |
795 map[pos] = match; | 811 map[pos] = match; |
796 return match; | 812 return match; |
797 } | 813 } |
798 | 814 |
799 bool get generatesValue => _rule.generatesValue; | 815 bool get generatesValue => _rule.generatesValue; |
800 | 816 |
801 toString() => 'MEMO($_rule)'; | 817 toString() => 'MEMO($_rule)'; |
802 } | 818 } |
803 | 819 |
804 _apply(fn, List args) { | 820 _apply(fn, List args) { |
805 switch (args.length) { | 821 switch (args.length) { |
806 case 0: return fn(); | 822 case 0: |
807 case 1: return fn(args[0]); | 823 return fn(); |
808 case 2: return fn(args[0], args[1]); | 824 case 1: |
809 case 3: return fn(args[0], args[1], args[2]); | 825 return fn(args[0]); |
810 case 4: return fn(args[0], args[1], args[2], args[3]); | 826 case 2: |
811 case 5: return fn(args[0], args[1], args[2], args[3], args[4]); | 827 return fn(args[0], args[1]); |
812 case 6: return fn(args[0], args[1], args[2], args[3], args[4], | 828 case 3: |
813 args[5]); | 829 return fn(args[0], args[1], args[2]); |
814 case 7: return fn(args[0], args[1], args[2], args[3], args[4], | 830 case 4: |
815 args[5], args[6]); | 831 return fn(args[0], args[1], args[2], args[3]); |
816 case 8: return fn(args[0], args[1], args[2], args[3], args[4], | 832 case 5: |
817 args[5], args[6], args[7]); | 833 return fn(args[0], args[1], args[2], args[3], args[4]); |
818 case 9: return fn(args[0], args[1], args[2], args[3], args[4], | 834 case 6: |
819 args[5], args[6], args[7], args[8]); | 835 return fn(args[0], args[1], args[2], args[3], args[4], args[5]); |
820 case 10: return fn(args[0], args[1], args[2], args[3], args[4], | 836 case 7: |
821 args[5], args[6], args[7], args[8], args[9]); | 837 return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6]); |
| 838 case 8: |
| 839 return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6], |
| 840 args[7]); |
| 841 case 9: |
| 842 return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6], |
| 843 args[7], args[8]); |
| 844 case 10: |
| 845 return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6], |
| 846 args[7], args[8], args[9]); |
822 | 847 |
823 default: | 848 default: |
824 throw new Exception('Too many arguments in _apply: $args'); | 849 throw new Exception('Too many arguments in _apply: $args'); |
825 } | 850 } |
826 } | 851 } |
827 | 852 |
828 List _unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z) { | 853 List _unspread(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, |
| 854 w, x, y, z) { |
829 List list = new List(); | 855 List list = new List(); |
830 add(element) { if (element != null) list.add(element); } | 856 add(element) { |
| 857 if (element != null) list.add(element); |
| 858 } |
| 859 |
831 add(a); | 860 add(a); |
832 add(b); | 861 add(b); |
833 add(c); | 862 add(c); |
834 add(d); | 863 add(d); |
835 add(e); | 864 add(e); |
836 add(f); | 865 add(f); |
837 add(g); | 866 add(g); |
838 add(h); | 867 add(h); |
839 add(i); | 868 add(i); |
840 add(j); | 869 add(j); |
841 add(k); | 870 add(k); |
842 add(l); | 871 add(l); |
843 add(m); | 872 add(m); |
844 add(n); | 873 add(n); |
845 add(o); | 874 add(o); |
846 add(p); | 875 add(p); |
847 add(q); | 876 add(q); |
848 add(r); | 877 add(r); |
849 add(s); | 878 add(s); |
850 add(t); | 879 add(t); |
851 add(u); | 880 add(u); |
852 add(v); | 881 add(v); |
853 add(w); | 882 add(w); |
854 add(x); | 883 add(x); |
855 add(y); | 884 add(y); |
856 add(z); | 885 add(z); |
857 return list; | 886 return list; |
858 } | 887 } |
OLD | NEW |