OLD | NEW |
---|---|
(Empty) | |
1 #library('Peg Parser'); | |
Jennifer Messerly
2011/10/28 01:21:10
Do they intend to allow spaces in the name? (I'm n
sra1
2011/10/28 05:06:25
My working hypothesis is if it was meant to be an
| |
2 | |
3 /* | |
4 * The following functions are combinators for building Rules. | |
Jennifer Messerly
2011/10/28 01:21:10
awesome comment!
| |
5 * | |
6 * A rule is one of the following | |
7 * - A String which matches the string literally. | |
8 * - A Symbol which matches the symbol's definition. | |
9 * - A list of rules with an optional reducing function, which matches a sequenc e. | |
10 * - The result of calling one of the combinators. | |
11 * | |
12 * Some rules are 'value-generating' rules, they return an 'abstract syntax | |
13 * tree' with the match. If a rule is not value-generating [:null:] is the | |
14 * value. | |
15 * | |
16 * A Symbol is always a value-generating rule. If the value is not required, use | |
17 * [:SKIP(aSymbol):] in place of [:aSymbol:]. | |
18 * | |
19 * A String is not a value-generating rule but can be converted into one by | |
20 * using [:TEXT('string'):] in place of [:'string':]. | |
21 * | |
22 * A list or sequence is value-generating depending on the subrules. The | |
23 * sequence is value-generating if any of the subrules are value-generating or | |
24 * if there is a reducing function. If no reducing function is given, the value | |
25 * returned depends on the number of value-generating subrules. If there is | |
26 * only one value generating subrule, that provideds the value for the sequence. | |
27 * If there are more, then the value is a list of the values of the | |
28 * value-generating subrules. | |
29 */ | |
30 | |
31 /** | |
32 * Matches one character by a predicate on the character code. | |
33 * If [spec] is an int, that character is matched. | |
34 * If [spec] is a function it is used | |
35 * | |
36 * Example [: CHARCODE((code) => 48 <= code && code <= 57) :] recognizes an | |
37 * ASCII digit. | |
38 * | |
39 * CHARCODE does not generate a value. | |
40 */ | |
41 _Rule CHARCODE(spec, [name]) { | |
42 if (spec is int) | |
Jennifer Messerly
2011/10/28 01:21:10
I hesitate to suggest style for someone else's cod
sra1
2011/10/28 05:06:25
I'm experimenting. cpp style does not actually say
| |
43 return new _CharCodeRule((code) => code == spec, name); | |
44 else | |
45 return new _CharCodeRule(spec, name); | |
46 } | |
47 | |
48 /** | |
49 * Matches one of the [characters]. | |
50 * | |
51 * CHAR does not generate a value. | |
52 */ | |
53 _Rule CHAR([characters]) { | |
54 if (characters == null) | |
55 return new _AnyCharRule(); | |
Jennifer Messerly
2011/10/28 01:21:10
const?
sra1
2011/10/28 05:06:25
Done.
| |
56 if (characters is int) | |
57 return CHARCODE(characters); | |
58 | |
59 // Find the range of character codes and construct an array of flags for codes | |
60 // within the range. | |
61 List<int> codes = characters.charCodes(); | |
62 codes.sort((a, b) => a < b ? -1 : a > b ? 1 : 0); | |
Jennifer Messerly
2011/10/28 01:21:10
codes.sort((a, b) => a.compareTo(b));
...since num
sra1
2011/10/28 05:06:25
I could, but the Frog code is kind of disgusting.
| |
63 int lo = codes[0]; | |
64 int hi = codes[codes.length - 1]; | |
65 if (lo == hi) | |
66 return CHARCODE(lo); | |
67 int len = hi - lo + 1; | |
68 List<bool> flags = new List<bool>(len); | |
Jennifer Messerly
2011/10/28 01:21:10
style nit: repeated type name
sra1
2011/10/28 05:06:25
Done.
| |
69 for (int i = 0; i < len; ++i) | |
70 flags[i] = false; | |
Jennifer Messerly
2011/10/28 01:21:10
I wonder if we should have a BitList class somewhe
sra1
2011/10/28 05:06:25
The bitlist would probably be slower in access too
| |
71 for (int code in codes) | |
72 flags[code - lo] = true; | |
73 | |
74 return CHARCODE((code) => code >= lo && code <= hi && flags[code - lo]); | |
75 } | |
76 | |
77 /** | |
78 * Matches the end of the input. | |
79 * | |
80 * END does not generate a value. | |
81 */ | |
82 _Rule get END() => new _EndOfInputRule(); | |
83 | |
84 /** | |
85 * Throws an exception. | |
86 */ | |
87 _Rule ERROR(String message) => new _ErrorRule(message); | |
88 | |
89 /** | |
90 * Matches [rule] but does not consume the input. Useful for matching a right | |
91 * context. | |
92 * | |
93 * AT does not generate a value. | |
94 */ | |
95 _Rule AT(rule) => new _ContextRule(_compile(rule)); | |
96 | |
97 /** | |
98 * Matches when [rule] does not match. No input is consumed. | |
99 * | |
100 * NOT does not generate a value. | |
101 */ | |
102 _Rule NOT(rule) => new _NegativeContextRule(_compile(rule)); | |
103 | |
104 /** | |
105 * Matches [rule] but generates no value even if [rule] generates a value. | |
106 * | |
107 * SKIP never generates a value. | |
108 */ | |
109 _Rule SKIP(rule) => new _SkipRule(_compile(rule)); | |
110 | |
111 /** | |
112 * Matches [rule] in a lexical context where whitespace is not automatically | |
113 * skipped. Useful for matching what would normally be considered to be tokens. | |
114 * [name] is a user-friendly description of what is being matched and is used in | |
115 * error messages. | |
116 * | |
117 * LEX(rule) | |
118 * LEX(name, rule) | |
119 * | |
120 * LEX does not generate a value. If a value is required, wrap LEX with TEXT. | |
121 */ | |
122 _Rule LEX(arg1, [arg2]) { | |
123 if (arg2 == null) | |
124 return new _LexicalRule(arg1 is String ? arg1 : null, _compile(arg1)); | |
125 else | |
126 return new _LexicalRule(arg1, _compile(arg2)); | |
127 } | |
128 | |
129 /** | |
130 * Matches [rule] and generates a value from the matched text. If the [rule] | |
131 * matches, then TEXT(rule) matches and has a value derived from the string | |
132 * fragment that was matched. The default derived value is the string fragment. | |
133 * | |
134 * TEXT always generates a value. | |
135 */ | |
136 _Rule TEXT(rule, [extractor]) => | |
137 new _TextValueRule(_compile(rule), | |
138 extractor == null | |
139 ? (string, start, end) => string.substring(start, end) | |
140 : extractor); | |
141 | |
142 /** | |
143 * Matches an optional rule. | |
144 * | |
145 * MAYBE is a value generating matcher. | |
146 * | |
147 * If [rule] is value generating then the value is the value generated by [rule] | |
148 * if it matches, and [:null:] if it does not. | |
149 * | |
150 * If [rule] is not value generatinge then the value is [:true:] if [rule] | |
151 * matches and [:false:] if it does not. | |
152 */ | |
153 _Rule MAYBE(rule) => new _OptionalRule(_compile(rule)); | |
154 | |
155 /** | |
156 * MANY(rule) matches [rule] [min] or more times. | |
157 * [min] must be 0 or 1. | |
158 * If [separator] is provided it is used to match a separator between matches of | |
159 * [rule]. | |
160 * | |
161 * MANY is a value generating matcher. The value is a list of the matches of | |
162 * [rule]. The list may be empty if [:min == 0:]. | |
163 */ | |
164 _Rule MANY(rule, [separator = null, int min = 1]) { | |
165 assert(0 <= min && min <= 1); | |
166 return new _RepeatRule(_compile(rule), _compile_optional(separator), min); | |
167 } | |
168 | |
169 /** | |
170 * Matches [rule] zero or more times. Shorthand for [:MANY(rule, min:0):] | |
171 * TODO: retire min: parameter? | |
172 * | |
173 * MANY0 is a value generating matcher. | |
174 */ | |
175 _Rule MANY0(rule, [separator = null]) { | |
176 return new _RepeatRule(_compile(rule), _compile_optional(separator), 0); | |
177 } | |
178 | |
179 /** | |
180 * Matches [rules] in order until one succeeds. | |
181 * | |
182 * OR is value-generating. | |
183 */ | |
184 _Rule OR([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]) => | |
Jennifer Messerly
2011/10/28 01:21:10
something tells me you wanted spread/rest args :)
sra1
2011/10/28 05:06:25
Yes, it looks bad because [ means both sequence an
| |
185 _compileMultiRule( | |
186 (a is List && b == null) // Backward compat. OR([a, b]) => OR(a, b). | |
187 ? a | |
188 : _unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z), | |
189 false, | |
190 (compiledRules, valueCount, reducer) => | |
191 new _ChoiceRule(compiledRules)); | |
192 | |
193 | |
194 | |
195 _Rule SEQ([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]) => | |
196 _compile(_unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z)); | |
197 | |
198 /** | |
199 * Matches [rule] | |
200 */ | |
201 _Rule MEMO(rule) => new _MemoRule(_compile(rule)); | |
202 | |
203 _Rule TAG(tag, rule) => _compile([rule, (ast) => [tag, ast]]); | |
204 | |
205 | |
206 class ParseError implements Exception { | |
207 const ParseError(String this._message); | |
208 String toString() => _message; | |
209 final String _message; | |
210 } | |
211 | |
212 /** | |
213 * A grammar is a collection of symbols and rules that may be used to parse an | |
214 * input. | |
215 */ | |
216 class Grammar { | |
Jennifer Messerly
2011/10/28 01:21:10
In theory, you could get by without this class? It
sra1
2011/10/28 05:06:25
In a different implementation the Grammar would be
| |
217 Map<String, Symbol> _symbols; | |
218 | |
219 /** This rule may be set by the user to define whitespace. */ | |
220 _Rule _whitespace; | |
221 | |
222 _Rule get whitespace() => _whitespace; | |
223 void set whitespace(rule) { _whitespace = _compile(rule); } | |
224 | |
225 Grammar() { | |
226 _symbols = new Map<String, Symbol>(); | |
227 //whitespace = OR([' ', '\t', '\r', '\n']); | |
Jennifer Messerly
2011/10/28 01:21:10
remove?
sra1
2011/10/28 05:06:25
Done.
| |
228 this.whitespace = CHAR(' \t\r\n'); | |
Jennifer Messerly
2011/10/28 01:21:10
shouldn't need "this." anymore now that the bug is
sra1
2011/10/28 05:06:25
Done.
| |
229 } | |
230 | |
231 /** | |
232 * operator [] is used to find or create symbols. Symbols may appear in rules | |
233 * to define recursive rules. | |
234 */ | |
235 Symbol operator [](String name) { | |
Jennifer Messerly
2011/10/28 01:21:10
Could define a setter too that would set the Symbo
sra1
2011/10/28 05:06:25
The whole thing with having to define symbols and
| |
236 if (_symbols.containsKey(name)) | |
237 return _symbols[name]; | |
238 Symbol s = new Symbol(name, this); | |
239 _symbols[name] = s; | |
240 return s; | |
241 } | |
242 | |
243 /** | |
244 * Parses the input string and returns the parsed AST, or throws an exception | |
245 * if the input can't be parsed. | |
246 */ | |
247 parse(root, String text) { | |
248 for (var symbol in _symbols.getValues()) | |
Jennifer Messerly
2011/10/28 01:21:10
style nit: curlies
| |
249 if (symbol._rule == null) | |
250 print('${symbol.name} is undefined'); | |
251 | |
252 var state = new _ParserState(text, whitespace: whitespace); | |
253 var match = _compile(root).match(state, 0); | |
254 if (match == null) | |
255 return diagnose(state); | |
256 var pos = match[0]; | |
257 pos = _skip_whitespace(state, pos); | |
258 if (pos == state._end) | |
259 return match[1]; | |
260 return diagnose(state); | |
261 } | |
262 | |
263 diagnose(state) { | |
264 // TODO: locate error and describe. | |
Jennifer Messerly
2011/10/28 01:21:10
ah, error messages, the hard part of Packrat parse
| |
265 print('Parse failed at position ${state.max_pos}'); | |
266 var message = 'unexpected error'; | |
267 if (state.max_rule.isEmpty()) { | |
268 var s = new Set(); | |
269 for (var rule in state.max_rule) | |
Jennifer Messerly
2011/10/28 01:21:10
if we got here, isn't state.max_rule empty? should
sra1
2011/10/28 05:06:25
Good catch. I was wondering where the informative
| |
270 s.add(rule.description()); | |
271 var tokens = new List<String>.from(s); | |
272 tokens.sort((a, b) => a.startsWith("'")==b.startsWith("'") ? a.compareTo(b ) | |
Jennifer Messerly
2011/10/28 01:21:10
style nit: long line, spaces etc
sra1
2011/10/28 05:06:25
Done.
| |
273 : a.startsWith("'") ? +1 : -1); | |
274 var expected = Strings.join(tokens, ' or '); | |
275 var found = state.max_pos == state._end ? 'end of file' | |
276 : "'${state._text[state.max_pos]}'"; | |
277 message = 'Expected $expected but found $found'; | |
278 } | |
279 int start = state.max_pos; | |
280 int end = start; | |
281 while (start >= 1 && state._text[start - 1] != '\n') --start; | |
282 while (end < state._text.length && state._text[end] != '\n') ++end; | |
283 var line = state._text.substring(start, end); | |
284 var indicator = ''; | |
285 for (var i = 0; i < line.length && start + i < state.max_pos; i++) | |
286 indicator = indicator + ' '; | |
287 indicator = indicator + '^'; | |
288 print(message); | |
289 print(line); | |
290 print(indicator); | |
291 return null; | |
292 } | |
293 } | |
294 | |
295 class Symbol { | |
296 final String name; | |
297 final Grammar grammar; | |
298 _Rule _rule; | |
299 | |
300 Symbol(this.name, this.grammar); | |
301 | |
302 void set def(rule) { | |
303 assert(_rule == null); // Assign once. | |
304 _rule = _compile(rule); | |
305 } | |
306 | |
307 toString() => _rule == null ? '<$name>' : '<$name = $_rule>'; | |
308 } | |
309 | |
310 | |
311 class _ParserState { | |
312 _ParserState(this._text, [_Rule whitespace = null]) { | |
313 _end = this._text.length; | |
314 whitespace_rule = whitespace; | |
315 max_rule = []; | |
316 } | |
317 | |
318 String _text; | |
319 int _end; | |
320 | |
321 // | |
322 bool in_whitespace_mode = false; | |
Jennifer Messerly
2011/10/28 01:21:10
style nit: camelCase
sra1
2011/10/28 05:06:25
Done.
| |
323 _Rule whitespace_rule = null; | |
324 | |
325 // Used for constructing an error message. | |
326 int inhibitExpectedTrackingDepth = 0; | |
327 int max_pos = 0; | |
328 var max_rule; | |
329 } | |
330 | |
331 /** | |
332 * An interface tag for rules. If this tag is on a rule, then the description() | |
333 * of the rule is something sensible to put in a message. | |
334 */ | |
335 interface _Expectable { | |
336 String description(); | |
337 } | |
338 | |
339 class _Rule { | |
340 // Returns null for a match failure or [pos, ast] for success. | |
341 match(_ParserState state, int pos) { | |
342 if (! state.in_whitespace_mode) { | |
343 pos = _skip_whitespace(state, pos); | |
344 } | |
345 return matchAfterWS(state, pos); | |
346 } | |
347 | |
348 // Faster entry point for matching a sub-rule that is matched to the start | |
349 // position of the super-rule. Whitespace has already been skipped so no need | |
350 // to try to skip it again. | |
351 matchAfterWS(_ParserState state, int pos) { | |
352 if (state.inhibitExpectedTrackingDepth == 0) { | |
353 // Track position for possible error messaging | |
354 if (pos > state.max_pos) { | |
355 // Store position and the rule. | |
356 state.max_pos = pos; | |
357 if (this is _Expectable) { | |
358 state.max_rule = [this]; | |
359 } else { | |
360 state.max_rule = []; | |
361 } | |
362 } else if (pos == state.max_pos) { | |
363 if (this is _Expectable) { | |
364 state.max_rule.add(this); | |
365 } | |
366 } | |
367 } | |
368 // Delegate the matching logic to the the specialized function. | |
369 return _match(state, pos); | |
370 } | |
371 | |
372 // Overridden in subclasses to match the rule. | |
373 _match(_ParserState state, int pos) => null; | |
374 | |
375 // Does the rule generate a value (AST) with the match? | |
376 bool get generatesValue() => false; | |
377 | |
378 get defaultValue() => null; | |
379 } | |
380 | |
381 int _skip_whitespace(state, pos) { | |
382 // Returns the next non-whitespace position. | |
383 // This is done by matching the optional whitespace_rule with the current text . | |
384 if (state.whitespace_rule == null) | |
385 return pos; | |
386 state.in_whitespace_mode = true; | |
387 state.inhibitExpectedTrackingDepth++; | |
388 while (true) { | |
389 var match = state.whitespace_rule.match(state, pos); | |
390 if (match == null) | |
391 break; | |
392 pos = match[0]; | |
393 } | |
394 state.in_whitespace_mode = false; | |
395 state.inhibitExpectedTrackingDepth--; | |
396 return pos; | |
397 } | |
398 | |
399 | |
400 _Rule _compile_optional(rule) { | |
Jennifer Messerly
2011/10/28 01:21:10
why not put this check in _compile?
style nit: nam
sra1
2011/10/28 05:06:25
It gives the user better errors if null throws at
| |
401 return rule == null ? null : _compile(rule); | |
402 } | |
403 | |
404 _Rule _compile(rule) { | |
Jennifer Messerly
2011/10/28 01:21:10
I found the name "compile" a bit confusing here. N
sra1
2011/10/28 05:06:25
Me neither.
| |
405 if (rule is _Rule) | |
406 return rule; | |
407 if (rule is String) | |
408 return new _StringRule(rule); | |
409 if (rule is Symbol) | |
410 return new _SymbolRule(rule); | |
411 if (rule is RegExp) | |
412 return new _RegExpRule(rule); | |
413 if (rule is List) { | |
414 return _compileMultiRule( | |
415 rule, true, | |
416 (compiledRules, valueCount, reducer) => | |
417 new _SequenceRule(compiledRules, valueCount, reducer)); | |
418 } | |
419 throw new Exception('Cannot compile rule: $rule'); | |
420 } | |
421 | |
422 class _EndOfInputRule extends _Rule { | |
423 _match(_ParserState state, int pos) { | |
424 if (pos == state._end) | |
425 return [pos, null]; | |
426 return null; | |
427 } | |
428 | |
429 toString() => 'END'; | |
430 } | |
431 | |
432 class _ErrorRule extends _Rule { | |
433 String message; | |
434 _ErrorRule(String this.message); | |
435 _match(_ParserState state, int pos) { | |
436 throw new ParseError(message); | |
Jennifer Messerly
2011/10/28 01:21:10
include position?
sra1
2011/10/28 05:06:25
The position needs to be converted into something
| |
437 } | |
438 | |
439 toString() => 'ERROR($message)'; | |
440 } | |
441 | |
442 class _CharCodeRule extends _Rule { | |
443 Function _predicate; | |
444 var _name; | |
445 _CharCodeRule(Function this._predicate, this._name); | |
Jennifer Messerly
2011/10/28 01:21:10
don't need to repeat the type "Function" here.
| |
446 _match(_ParserState state, int pos) { | |
447 if (pos == state._end) | |
Jennifer Messerly
2011/10/28 01:21:10
I wonder if there's a way to do this without check
sra1
2011/10/28 05:06:25
I din't think there is. Every access to the text
| |
448 return null; | |
449 int code = state._text.charCodeAt(pos); | |
450 if (_predicate(code)) | |
451 return [pos + 1, null]; | |
452 return null; | |
453 } | |
454 | |
455 toString() => _name == null ? 'CHARCODE($_predicate)' : 'CHARCODE($_name)'; | |
456 } | |
457 | |
458 class _AnyCharRule extends _Rule { | |
459 _match(_ParserState state, int pos) { | |
460 if (pos == state._end) | |
461 return null; | |
462 return [pos + 1, null]; | |
463 } | |
464 | |
465 toString() => 'CHAR()'; | |
466 } | |
467 | |
468 class _SymbolRule extends _Rule { | |
469 final Symbol _symbol; | |
470 _SymbolRule(Symbol this._symbol); | |
471 _match(_ParserState state, int pos) { | |
472 if (_symbol._rule == null) | |
473 throw new Exception("Symbol '${_symbol.name}' is undefined"); | |
474 return _symbol._rule.match(state, pos); | |
475 } | |
476 | |
477 bool get generatesValue() => true; | |
478 | |
479 toString() => '<${_symbol.name}>'; | |
480 } | |
481 | |
482 class _SkipRule extends _Rule { | |
483 // A rule that has no value. | |
484 _Rule _rule; | |
485 _SkipRule(_Rule this._rule); | |
486 _match(_ParserState state, int pos) { | |
487 var match = _rule.matchAfterWS(state, pos); | |
488 if (match == null) | |
489 return null; | |
490 return [match[0], null]; | |
491 } | |
492 | |
493 toString() => 'TOKEN($_rule)'; | |
494 } | |
495 | |
496 class _StringRule extends _Rule implements _Expectable { | |
497 final String _string; | |
498 int _len; | |
499 _StringRule(this._string) { | |
500 _len = _string.length; | |
501 } | |
502 | |
503 _match(_ParserState state, int pos) { | |
504 if (pos + _len > state._end) | |
Jennifer Messerly
2011/10/28 01:21:10
should be >=, to match a string at the end ?
sra1
2011/10/28 05:06:25
It is correct. Normally you check pos < end. But
| |
505 return null; | |
506 for (int i = 0; i < _len; i++) { | |
507 if (state._text.charCodeAt(pos + i) != _string.charCodeAt(i)) | |
508 return null; | |
509 } | |
510 return [pos + _len, null]; | |
511 } | |
512 | |
513 //get defaultValue() => _string; | |
514 | |
515 toString() => '"$_string"'; | |
516 | |
517 description() => "'$_string'"; | |
518 } | |
519 | |
520 class _RegExpRule extends _Rule { | |
521 RegExp _re; | |
522 _RegExpRule(this._re) { | |
523 // There is no convenient way to match an anchored substring. | |
524 throw new Exception('RegExp matching not supported'); | |
525 } | |
526 | |
527 toString() => '"$_re"'; | |
528 } | |
529 | |
530 class _LexicalRule extends _Rule implements _Expectable { | |
531 final String _name; | |
532 final _Rule _rule; | |
533 | |
534 _LexicalRule(String this._name, _Rule this._rule); | |
535 | |
536 _match(_ParserState state, int pos) { | |
537 state.in_whitespace_mode = true; | |
538 state.inhibitExpectedTrackingDepth++; | |
539 var match = _rule.matchAfterWS(state, pos); | |
540 state.inhibitExpectedTrackingDepth--; | |
541 state.in_whitespace_mode = false; | |
542 return match; | |
543 } | |
544 | |
545 toString() => _name; | |
546 | |
547 description() => _name == null ? '?' : _name; | |
548 } | |
549 | |
550 class _TextValueRule extends _Rule { | |
551 final _Rule _rule; | |
552 final _extract; // Function | |
553 | |
554 _TextValueRule(_Rule this._rule, Function this._extract); | |
555 | |
556 _match(_ParserState state, int pos) { | |
557 var match = _rule.matchAfterWS(state, pos); | |
558 if (match == null) { | |
559 return null; | |
560 } | |
561 var endPos = match[0]; | |
562 return [endPos, _extract(state._text, pos, endPos)]; | |
563 } | |
564 | |
565 bool get generatesValue() => true; | |
566 | |
567 toString() => 'TEXT($_rule)'; | |
568 } | |
569 | |
570 _Rule _compileMultiRule(List rules, | |
571 bool allowReducer, | |
572 finish(compiledRules, valueCount, reducer)) { | |
573 int valueCount = 0; | |
574 List compiledRules = new List<_Rule>(); | |
575 Function reducer; | |
576 for (var rule in rules) { | |
577 if (reducer != null) | |
578 throw new Exception('Reducer must be last in sequence: $rule'); | |
579 if (rule is Function) { | |
580 if (allowReducer) | |
581 reducer = rule; | |
582 else | |
583 throw new Exception('Bad rule: "$rule"'); | |
584 } else { | |
585 _Rule compiledRule = _compile(rule); | |
586 if (compiledRule.generatesValue) | |
587 ++valueCount; | |
588 compiledRules.add(compiledRule); | |
589 } | |
590 } | |
591 return finish(compiledRules, valueCount, reducer); | |
592 } | |
593 | |
594 String _formatMultiRule(String functor, List rules) { | |
595 var sb = new StringBuffer(functor); | |
596 sb.add('('); | |
597 var separator = ''; | |
598 for (var rule in rules) { | |
599 sb.add(separator); | |
600 sb.add(rule); | |
601 separator = ','; | |
602 } | |
603 sb.add(')'); | |
604 return sb.toString(); | |
605 } | |
606 | |
607 class _SequenceRule extends _Rule { | |
608 // This rule matches the component rules in order. | |
609 final List<_Rule> _rules; | |
610 final int _generatingSubRules = 0; | |
611 final Function _reducer; | |
612 bool _generatesValue; | |
613 _SequenceRule(List<_Rule> this._rules, | |
614 int this._generatingSubRules, | |
615 Function this._reducer) { | |
616 _generatesValue = _generatingSubRules > 0 || _reducer != null; | |
617 } | |
618 | |
619 _match(state, pos) { | |
620 var sequence = []; | |
621 for (var rule in _rules) { | |
622 var match = rule.match(state, pos); | |
623 if (match == null) | |
624 return null; | |
625 if (rule.generatesValue) { | |
626 var ast = match[1]; | |
627 sequence.add(ast); | |
628 } | |
629 pos = match[0]; | |
630 } | |
631 if (_reducer == null) { | |
632 if (_generatingSubRules == 0) | |
633 return [pos, null]; | |
634 if (_generatingSubRules == 1) | |
635 return [pos, sequence[0]]; | |
636 return [pos, sequence]; | |
637 } else { | |
638 return [pos, _apply(_reducer, sequence)]; | |
639 } | |
640 } | |
641 | |
642 bool get generatesValue() => _generatesValue; | |
643 | |
644 toString() => _formatMultiRule('SEQ', _rules); | |
645 } | |
646 | |
647 class _ChoiceRule extends _Rule { | |
648 // This rule matches the first component rule that matches. | |
649 List<_Rule> _rules; | |
650 _ChoiceRule(List<_Rule> this._rules); | |
651 | |
652 _match(state, pos) { | |
653 for (var rule in _rules) { | |
654 var match = rule.match(state, pos); | |
655 if (match != null) { | |
656 /* | |
657 if (!rule.generatesValue) { | |
658 var value = rule.defaultValue; | |
659 if (value != null) | |
660 return [match[0], value]; | |
661 } | |
662 */ | |
663 return match; | |
664 } | |
665 } | |
666 return null; | |
667 } | |
668 | |
669 bool get generatesValue() => true; | |
670 | |
671 toString() => _formatMultiRule('OR', _rules); | |
672 } | |
673 | |
674 class _OptionalRule extends _Rule { | |
675 _Rule _rule; | |
676 _OptionalRule(_Rule this._rule); | |
677 _match(_ParserState state, int pos) { | |
678 var match = _rule.match(state, pos); | |
679 if (_rule.generatesValue) | |
680 if (match == null) | |
Jennifer Messerly
2011/10/28 01:21:10
I often write this as:
return (match == null) ? [p
sra1
2011/10/28 05:06:25
I'm not a fan of ?:
I prefer if-then-else as an ex
| |
681 return [pos, null]; | |
682 else | |
683 return match; | |
684 if (match == null) | |
685 return [pos, false]; | |
686 else | |
687 return [match[0], true]; | |
688 } | |
689 | |
690 bool get generatesValue() => true; | |
691 | |
692 toString() => 'MAYBE($_rule)'; | |
693 } | |
694 | |
695 class _ContextRule extends _Rule { | |
696 _Rule _rule; | |
697 _ContextRule(_Rule this._rule); | |
698 _match(_ParserState state, int pos) { | |
699 // TODO: protect error state. | |
700 var match = _rule._match(state, pos); | |
701 if (match == null) | |
702 return null; | |
703 return [pos, null]; | |
704 } | |
705 | |
706 toString() => 'AT($_rule)'; | |
707 } | |
708 | |
709 class _NegativeContextRule extends _Rule { | |
710 _Rule _rule; | |
711 _NegativeContextRule(_Rule this._rule); | |
712 _match(_ParserState state, int pos) { | |
713 // TODO: protect error state. | |
714 var match = _rule._match(state, pos); | |
715 if (match == null) | |
716 return [pos, null]; | |
717 return null; | |
718 } | |
719 | |
720 toString() => 'NOT($_rule)'; | |
721 } | |
722 | |
723 class _RepeatRule extends _Rule { | |
724 // Matches zero, one or more items. | |
725 _Rule _rule; | |
726 _Rule _separator; | |
727 int _min; | |
728 | |
729 _RepeatRule(this._rule, this._separator, this._min); | |
730 | |
731 _match(state, pos) { | |
732 // First match. | |
733 var match = _rule.match(state, pos); | |
734 if (match == null) | |
735 if (_min == 0) | |
736 return [pos, []]; | |
737 else | |
738 return null; | |
739 pos = match[0]; | |
740 var result = [match[1]]; | |
741 | |
742 // Subsequent matches: | |
743 while (true) { | |
744 var newPos = pos; | |
745 if (_separator != null) { | |
746 match = _separator.match(state, pos); | |
747 if (match == null) | |
748 return [pos, result]; | |
749 newPos = match[0]; | |
750 } | |
751 match = _rule.match(state, newPos); | |
752 if (match == null) | |
753 return [pos, result]; | |
754 pos = match[0]; | |
755 result.add(match[1]); | |
756 } | |
757 } | |
758 | |
759 bool get generatesValue() => true; | |
760 | |
761 toString() => 'MANY(min:$_min, $_rule${_separator==null?'':", sep: $_separator "})'; | |
762 } | |
763 | |
764 class _MemoRule extends _Rule { | |
765 final _Rule _rule; | |
766 | |
767 var parseInstance; | |
768 | |
769 // A map from position to result. Can this be replaced with something | |
770 // smaller? | |
771 // TODO: figure out how to discard the map and parseInstance after parsing. | |
Jennifer Messerly
2011/10/28 01:21:10
Well, normally you'd have the memoization be per-r
| |
772 Map<int,Object> map; | |
773 | |
774 _MemoRule(this._rule); | |
775 | |
776 _match(state, pos) { | |
777 // See if we are still parsing the same input. Relies on the fact that the | |
778 // input is a string ans string are immutable. | |
779 if (parseInstance !== state._text) { | |
780 map = new Map<int,Object>(); | |
781 parseInstance = state._text; | |
782 } | |
783 // TODO: does this have to check or preserve parse state (like | |
784 // in_whitespace_mode, error position info etc?) | |
785 if (map.containsKey(pos)) { | |
786 //print('Found $pos $_rule => ${map[pos][0]}'); | |
787 return map[pos]; | |
Jennifer Messerly
2011/10/28 01:21:10
btw: instead of doing two lookups via "map.contain
sra1
2011/10/28 05:06:25
But null can be stored in the map!
| |
788 } | |
789 var match = _rule.match(state, pos); | |
790 map[pos] = match; | |
791 return match; | |
792 } | |
793 | |
794 bool get generatesValue() => _rule.generatesValue; | |
795 | |
796 toString() => 'MEMO($_rule)'; | |
797 } | |
798 | |
799 _apply(fn, List args) { | |
800 switch (args.length) { | |
801 case 0: return fn(); | |
802 case 1: return fn(args[0]); | |
803 case 2: return fn(args[0], args[1]); | |
804 case 3: return fn(args[0], args[1], args[2]); | |
805 case 4: return fn(args[0], args[1], args[2], args[3]); | |
806 case 5: return fn(args[0], args[1], args[2], args[3], args[4]); | |
807 case 6: return fn(args[0], args[1], args[2], args[3], args[4], | |
808 args[5]); | |
809 case 7: return fn(args[0], args[1], args[2], args[3], args[4], | |
810 args[5], args[6]); | |
811 case 8: return fn(args[0], args[1], args[2], args[3], args[4], | |
812 args[5], args[6], args[7]); | |
813 case 9: return fn(args[0], args[1], args[2], args[3], args[4], | |
814 args[5], args[6], args[7], args[8]); | |
815 case 10: return fn(args[0], args[1], args[2], args[3], args[4], | |
816 args[5], args[6], args[7], args[8], args[9]); | |
817 | |
818 default: | |
819 throw new Exception('Too many arguments in _apply: $args'); | |
820 } | |
821 } | |
822 | |
823 List _unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z) { | |
824 List list = new List(); | |
825 add(element) { if (element != null) list.add(element); } | |
826 add(a); | |
827 add(b); | |
828 add(c); | |
829 add(d); | |
830 add(e); | |
831 add(f); | |
832 add(g); | |
833 add(h); | |
834 add(i); | |
835 add(j); | |
836 add(k); | |
837 add(l); | |
838 add(m); | |
839 add(n); | |
840 add(o); | |
841 add(p); | |
842 add(q); | |
843 add(r); | |
844 add(s); | |
845 add(t); | |
846 add(u); | |
847 add(v); | |
848 add(w); | |
849 add(x); | |
850 add(y); | |
851 add(z); | |
852 return list; | |
853 } | |
OLD | NEW |