Chromium Code Reviews| Index: utils/peg/pegparser.dart |
| diff --git a/utils/peg/pegparser.dart b/utils/peg/pegparser.dart |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..0cfd564fe1bdba80873223a5685b65fb92863cd9 |
| --- /dev/null |
| +++ b/utils/peg/pegparser.dart |
| @@ -0,0 +1,853 @@ |
| +#library('Peg Parser'); |
|
Jennifer Messerly
2011/10/28 01:21:10
Do they intend to allow spaces in the name? (I'm n
sra1
2011/10/28 05:06:25
My working hypothesis is if it was meant to be an
|
| + |
| +/* |
| + * The following functions are combinators for building Rules. |
|
Jennifer Messerly
2011/10/28 01:21:10
awesome comment!
|
| + * |
| + * A rule is one of the following |
| + * - A String which matches the string literally. |
| + * - A Symbol which matches the symbol's definition. |
| + * - A list of rules with an optional reducing function, which matches a sequence. |
| + * - The result of calling one of the combinators. |
| + * |
| + * Some rules are 'value-generating' rules, they return an 'abstract syntax |
| + * tree' with the match. If a rule is not value-generating [:null:] is the |
| + * value. |
| + * |
| + * A Symbol is always a value-generating rule. If the value is not required, use |
| + * [:SKIP(aSymbol):] in place of [:aSymbol:]. |
| + * |
| + * A String is not a value-generating rule but can be converted into one by |
| + * using [:TEXT('string'):] in place of [:'string':]. |
| + * |
| + * A list or sequence is value-generating depending on the subrules. The |
| + * sequence is value-generating if any of the subrules are value-generating or |
| + * if there is a reducing function. If no reducing function is given, the value |
| + * returned depends on the number of value-generating subrules. If there is |
| + * only one value generating subrule, that provideds the value for the sequence. |
| + * If there are more, then the value is a list of the values of the |
| + * value-generating subrules. |
| + */ |
| + |
| +/** |
| + * Matches one character by a predicate on the character code. |
| + * If [spec] is an int, that character is matched. |
| + * If [spec] is a function it is used |
| + * |
| + * Example [: CHARCODE((code) => 48 <= code && code <= 57) :] recognizes an |
| + * ASCII digit. |
| + * |
| + * CHARCODE does not generate a value. |
| + */ |
| +_Rule CHARCODE(spec, [name]) { |
| + if (spec is int) |
|
Jennifer Messerly
2011/10/28 01:21:10
I hesitate to suggest style for someone else's cod
sra1
2011/10/28 05:06:25
I'm experimenting. cpp style does not actually say
|
| + return new _CharCodeRule((code) => code == spec, name); |
| + else |
| + return new _CharCodeRule(spec, name); |
| +} |
| + |
| +/** |
| + * Matches one of the [characters]. |
| + * |
| + * CHAR does not generate a value. |
| + */ |
| +_Rule CHAR([characters]) { |
| + if (characters == null) |
| + return new _AnyCharRule(); |
|
Jennifer Messerly
2011/10/28 01:21:10
const?
sra1
2011/10/28 05:06:25
Done.
|
| + if (characters is int) |
| + return CHARCODE(characters); |
| + |
| + // Find the range of character codes and construct an array of flags for codes |
| + // within the range. |
| + List<int> codes = characters.charCodes(); |
| + codes.sort((a, b) => a < b ? -1 : a > b ? 1 : 0); |
|
Jennifer Messerly
2011/10/28 01:21:10
codes.sort((a, b) => a.compareTo(b));
...since num
sra1
2011/10/28 05:06:25
I could, but the Frog code is kind of disgusting.
|
| + int lo = codes[0]; |
| + int hi = codes[codes.length - 1]; |
| + if (lo == hi) |
| + return CHARCODE(lo); |
| + int len = hi - lo + 1; |
| + List<bool> flags = new List<bool>(len); |
|
Jennifer Messerly
2011/10/28 01:21:10
style nit: repeated type name
sra1
2011/10/28 05:06:25
Done.
|
| + for (int i = 0; i < len; ++i) |
| + flags[i] = false; |
|
Jennifer Messerly
2011/10/28 01:21:10
I wonder if we should have a BitList class somewhe
sra1
2011/10/28 05:06:25
The bitlist would probably be slower in access too
|
| + for (int code in codes) |
| + flags[code - lo] = true; |
| + |
| + return CHARCODE((code) => code >= lo && code <= hi && flags[code - lo]); |
| +} |
| + |
| +/** |
| + * Matches the end of the input. |
| + * |
| + * END does not generate a value. |
| + */ |
| +_Rule get END() => new _EndOfInputRule(); |
| + |
| +/** |
| + * Throws an exception. |
| + */ |
| +_Rule ERROR(String message) => new _ErrorRule(message); |
| + |
| +/** |
| + * Matches [rule] but does not consume the input. Useful for matching a right |
| + * context. |
| + * |
| + * AT does not generate a value. |
| + */ |
| +_Rule AT(rule) => new _ContextRule(_compile(rule)); |
| + |
| +/** |
| + * Matches when [rule] does not match. No input is consumed. |
| + * |
| + * NOT does not generate a value. |
| + */ |
| +_Rule NOT(rule) => new _NegativeContextRule(_compile(rule)); |
| + |
| +/** |
| + * Matches [rule] but generates no value even if [rule] generates a value. |
| + * |
| + * SKIP never generates a value. |
| + */ |
| +_Rule SKIP(rule) => new _SkipRule(_compile(rule)); |
| + |
| +/** |
| + * Matches [rule] in a lexical context where whitespace is not automatically |
| + * skipped. Useful for matching what would normally be considered to be tokens. |
| + * [name] is a user-friendly description of what is being matched and is used in |
| + * error messages. |
| + * |
| + * LEX(rule) |
| + * LEX(name, rule) |
| + * |
| + * LEX does not generate a value. If a value is required, wrap LEX with TEXT. |
| + */ |
| +_Rule LEX(arg1, [arg2]) { |
| + if (arg2 == null) |
| + return new _LexicalRule(arg1 is String ? arg1 : null, _compile(arg1)); |
| + else |
| + return new _LexicalRule(arg1, _compile(arg2)); |
| +} |
| + |
| +/** |
| + * Matches [rule] and generates a value from the matched text. If the [rule] |
| + * matches, then TEXT(rule) matches and has a value derived from the string |
| + * fragment that was matched. The default derived value is the string fragment. |
| + * |
| + * TEXT always generates a value. |
| + */ |
| +_Rule TEXT(rule, [extractor]) => |
| + new _TextValueRule(_compile(rule), |
| + extractor == null |
| + ? (string, start, end) => string.substring(start, end) |
| + : extractor); |
| + |
| +/** |
| + * Matches an optional rule. |
| + * |
| + * MAYBE is a value generating matcher. |
| + * |
| + * If [rule] is value generating then the value is the value generated by [rule] |
| + * if it matches, and [:null:] if it does not. |
| + * |
| + * If [rule] is not value generatinge then the value is [:true:] if [rule] |
| + * matches and [:false:] if it does not. |
| + */ |
| +_Rule MAYBE(rule) => new _OptionalRule(_compile(rule)); |
| + |
| +/** |
| + * MANY(rule) matches [rule] [min] or more times. |
| + * [min] must be 0 or 1. |
| + * If [separator] is provided it is used to match a separator between matches of |
| + * [rule]. |
| + * |
| + * MANY is a value generating matcher. The value is a list of the matches of |
| + * [rule]. The list may be empty if [:min == 0:]. |
| + */ |
| +_Rule MANY(rule, [separator = null, int min = 1]) { |
| + assert(0 <= min && min <= 1); |
| + return new _RepeatRule(_compile(rule), _compile_optional(separator), min); |
| +} |
| + |
| +/** |
| + * Matches [rule] zero or more times. Shorthand for [:MANY(rule, min:0):] |
| + * TODO: retire min: parameter? |
| + * |
| + * MANY0 is a value generating matcher. |
| + */ |
| +_Rule MANY0(rule, [separator = null]) { |
| + return new _RepeatRule(_compile(rule), _compile_optional(separator), 0); |
| +} |
| + |
| +/** |
| + * Matches [rules] in order until one succeeds. |
| + * |
| + * OR is value-generating. |
| + */ |
| +_Rule OR([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]) => |
|
Jennifer Messerly
2011/10/28 01:21:10
something tells me you wanted spread/rest args :)
sra1
2011/10/28 05:06:25
Yes, it looks bad because [ means both sequence an
|
| + _compileMultiRule( |
| + (a is List && b == null) // Backward compat. OR([a, b]) => OR(a, b). |
| + ? a |
| + : _unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z), |
| + false, |
| + (compiledRules, valueCount, reducer) => |
| + new _ChoiceRule(compiledRules)); |
| + |
| + |
| + |
| +_Rule SEQ([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]) => |
| + _compile(_unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z)); |
| + |
| +/** |
| + * Matches [rule] |
| + */ |
| +_Rule MEMO(rule) => new _MemoRule(_compile(rule)); |
| + |
| +_Rule TAG(tag, rule) => _compile([rule, (ast) => [tag, ast]]); |
| + |
| + |
| +class ParseError implements Exception { |
| + const ParseError(String this._message); |
| + String toString() => _message; |
| + final String _message; |
| +} |
| + |
| +/** |
| + * A grammar is a collection of symbols and rules that may be used to parse an |
| + * input. |
| + */ |
| +class Grammar { |
|
Jennifer Messerly
2011/10/28 01:21:10
In theory, you could get by without this class? It
sra1
2011/10/28 05:06:25
In a different implementation the Grammar would be
|
| + Map<String, Symbol> _symbols; |
| + |
| + /** This rule may be set by the user to define whitespace. */ |
| + _Rule _whitespace; |
| + |
| + _Rule get whitespace() => _whitespace; |
| + void set whitespace(rule) { _whitespace = _compile(rule); } |
| + |
| + Grammar() { |
| + _symbols = new Map<String, Symbol>(); |
| + //whitespace = OR([' ', '\t', '\r', '\n']); |
|
Jennifer Messerly
2011/10/28 01:21:10
remove?
sra1
2011/10/28 05:06:25
Done.
|
| + this.whitespace = CHAR(' \t\r\n'); |
|
Jennifer Messerly
2011/10/28 01:21:10
shouldn't need "this." anymore now that the bug is
sra1
2011/10/28 05:06:25
Done.
|
| + } |
| + |
| + /** |
| + * operator [] is used to find or create symbols. Symbols may appear in rules |
| + * to define recursive rules. |
| + */ |
| + Symbol operator [](String name) { |
|
Jennifer Messerly
2011/10/28 01:21:10
Could define a setter too that would set the Symbo
sra1
2011/10/28 05:06:25
The whole thing with having to define symbols and
|
| + if (_symbols.containsKey(name)) |
| + return _symbols[name]; |
| + Symbol s = new Symbol(name, this); |
| + _symbols[name] = s; |
| + return s; |
| + } |
| + |
| + /** |
| + * Parses the input string and returns the parsed AST, or throws an exception |
| + * if the input can't be parsed. |
| + */ |
| + parse(root, String text) { |
| + for (var symbol in _symbols.getValues()) |
|
Jennifer Messerly
2011/10/28 01:21:10
style nit: curlies
|
| + if (symbol._rule == null) |
| + print('${symbol.name} is undefined'); |
| + |
| + var state = new _ParserState(text, whitespace: whitespace); |
| + var match = _compile(root).match(state, 0); |
| + if (match == null) |
| + return diagnose(state); |
| + var pos = match[0]; |
| + pos = _skip_whitespace(state, pos); |
| + if (pos == state._end) |
| + return match[1]; |
| + return diagnose(state); |
| + } |
| + |
| + diagnose(state) { |
| + // TODO: locate error and describe. |
|
Jennifer Messerly
2011/10/28 01:21:10
ah, error messages, the hard part of Packrat parse
|
| + print('Parse failed at position ${state.max_pos}'); |
| + var message = 'unexpected error'; |
| + if (state.max_rule.isEmpty()) { |
| + var s = new Set(); |
| + for (var rule in state.max_rule) |
|
Jennifer Messerly
2011/10/28 01:21:10
if we got here, isn't state.max_rule empty? should
sra1
2011/10/28 05:06:25
Good catch. I was wondering where the informative
|
| + s.add(rule.description()); |
| + var tokens = new List<String>.from(s); |
| + tokens.sort((a, b) => a.startsWith("'")==b.startsWith("'") ? a.compareTo(b) |
|
Jennifer Messerly
2011/10/28 01:21:10
style nit: long line, spaces etc
sra1
2011/10/28 05:06:25
Done.
|
| + : a.startsWith("'") ? +1 : -1); |
| + var expected = Strings.join(tokens, ' or '); |
| + var found = state.max_pos == state._end ? 'end of file' |
| + : "'${state._text[state.max_pos]}'"; |
| + message = 'Expected $expected but found $found'; |
| + } |
| + int start = state.max_pos; |
| + int end = start; |
| + while (start >= 1 && state._text[start - 1] != '\n') --start; |
| + while (end < state._text.length && state._text[end] != '\n') ++end; |
| + var line = state._text.substring(start, end); |
| + var indicator = ''; |
| + for (var i = 0; i < line.length && start + i < state.max_pos; i++) |
| + indicator = indicator + ' '; |
| + indicator = indicator + '^'; |
| + print(message); |
| + print(line); |
| + print(indicator); |
| + return null; |
| + } |
| +} |
| + |
| +class Symbol { |
| + final String name; |
| + final Grammar grammar; |
| + _Rule _rule; |
| + |
| + Symbol(this.name, this.grammar); |
| + |
| + void set def(rule) { |
| + assert(_rule == null); // Assign once. |
| + _rule = _compile(rule); |
| + } |
| + |
| + toString() => _rule == null ? '<$name>' : '<$name = $_rule>'; |
| +} |
| + |
| + |
| +class _ParserState { |
| + _ParserState(this._text, [_Rule whitespace = null]) { |
| + _end = this._text.length; |
| + whitespace_rule = whitespace; |
| + max_rule = []; |
| + } |
| + |
| + String _text; |
| + int _end; |
| + |
| + // |
| + bool in_whitespace_mode = false; |
|
Jennifer Messerly
2011/10/28 01:21:10
style nit: camelCase
sra1
2011/10/28 05:06:25
Done.
|
| + _Rule whitespace_rule = null; |
| + |
| + // Used for constructing an error message. |
| + int inhibitExpectedTrackingDepth = 0; |
| + int max_pos = 0; |
| + var max_rule; |
| +} |
| + |
| +/** |
| + * An interface tag for rules. If this tag is on a rule, then the description() |
| + * of the rule is something sensible to put in a message. |
| + */ |
| +interface _Expectable { |
| + String description(); |
| +} |
| + |
| +class _Rule { |
| + // Returns null for a match failure or [pos, ast] for success. |
| + match(_ParserState state, int pos) { |
| + if (! state.in_whitespace_mode) { |
| + pos = _skip_whitespace(state, pos); |
| + } |
| + return matchAfterWS(state, pos); |
| + } |
| + |
| + // Faster entry point for matching a sub-rule that is matched to the start |
| + // position of the super-rule. Whitespace has already been skipped so no need |
| + // to try to skip it again. |
| + matchAfterWS(_ParserState state, int pos) { |
| + if (state.inhibitExpectedTrackingDepth == 0) { |
| + // Track position for possible error messaging |
| + if (pos > state.max_pos) { |
| + // Store position and the rule. |
| + state.max_pos = pos; |
| + if (this is _Expectable) { |
| + state.max_rule = [this]; |
| + } else { |
| + state.max_rule = []; |
| + } |
| + } else if (pos == state.max_pos) { |
| + if (this is _Expectable) { |
| + state.max_rule.add(this); |
| + } |
| + } |
| + } |
| + // Delegate the matching logic to the the specialized function. |
| + return _match(state, pos); |
| + } |
| + |
| + // Overridden in subclasses to match the rule. |
| + _match(_ParserState state, int pos) => null; |
| + |
| + // Does the rule generate a value (AST) with the match? |
| + bool get generatesValue() => false; |
| + |
| + get defaultValue() => null; |
| +} |
| + |
| +int _skip_whitespace(state, pos) { |
| + // Returns the next non-whitespace position. |
| + // This is done by matching the optional whitespace_rule with the current text. |
| + if (state.whitespace_rule == null) |
| + return pos; |
| + state.in_whitespace_mode = true; |
| + state.inhibitExpectedTrackingDepth++; |
| + while (true) { |
| + var match = state.whitespace_rule.match(state, pos); |
| + if (match == null) |
| + break; |
| + pos = match[0]; |
| + } |
| + state.in_whitespace_mode = false; |
| + state.inhibitExpectedTrackingDepth--; |
| + return pos; |
| +} |
| + |
| + |
| +_Rule _compile_optional(rule) { |
|
Jennifer Messerly
2011/10/28 01:21:10
why not put this check in _compile?
style nit: nam
sra1
2011/10/28 05:06:25
It gives the user better errors if null throws at
|
| + return rule == null ? null : _compile(rule); |
| +} |
| + |
| +_Rule _compile(rule) { |
|
Jennifer Messerly
2011/10/28 01:21:10
I found the name "compile" a bit confusing here. N
sra1
2011/10/28 05:06:25
Me neither.
|
| + if (rule is _Rule) |
| + return rule; |
| + if (rule is String) |
| + return new _StringRule(rule); |
| + if (rule is Symbol) |
| + return new _SymbolRule(rule); |
| + if (rule is RegExp) |
| + return new _RegExpRule(rule); |
| + if (rule is List) { |
| + return _compileMultiRule( |
| + rule, true, |
| + (compiledRules, valueCount, reducer) => |
| + new _SequenceRule(compiledRules, valueCount, reducer)); |
| + } |
| + throw new Exception('Cannot compile rule: $rule'); |
| +} |
| + |
| +class _EndOfInputRule extends _Rule { |
| + _match(_ParserState state, int pos) { |
| + if (pos == state._end) |
| + return [pos, null]; |
| + return null; |
| + } |
| + |
| + toString() => 'END'; |
| +} |
| + |
| +class _ErrorRule extends _Rule { |
| + String message; |
| + _ErrorRule(String this.message); |
| + _match(_ParserState state, int pos) { |
| + throw new ParseError(message); |
|
Jennifer Messerly
2011/10/28 01:21:10
include position?
sra1
2011/10/28 05:06:25
The position needs to be converted into something
|
| + } |
| + |
| + toString() => 'ERROR($message)'; |
| +} |
| + |
| +class _CharCodeRule extends _Rule { |
| + Function _predicate; |
| + var _name; |
| + _CharCodeRule(Function this._predicate, this._name); |
|
Jennifer Messerly
2011/10/28 01:21:10
don't need to repeat the type "Function" here.
|
| + _match(_ParserState state, int pos) { |
| + if (pos == state._end) |
|
Jennifer Messerly
2011/10/28 01:21:10
I wonder if there's a way to do this without check
sra1
2011/10/28 05:06:25
I din't think there is. Every access to the text
|
| + return null; |
| + int code = state._text.charCodeAt(pos); |
| + if (_predicate(code)) |
| + return [pos + 1, null]; |
| + return null; |
| + } |
| + |
| + toString() => _name == null ? 'CHARCODE($_predicate)' : 'CHARCODE($_name)'; |
| +} |
| + |
| +class _AnyCharRule extends _Rule { |
| + _match(_ParserState state, int pos) { |
| + if (pos == state._end) |
| + return null; |
| + return [pos + 1, null]; |
| + } |
| + |
| + toString() => 'CHAR()'; |
| +} |
| + |
| +class _SymbolRule extends _Rule { |
| + final Symbol _symbol; |
| + _SymbolRule(Symbol this._symbol); |
| + _match(_ParserState state, int pos) { |
| + if (_symbol._rule == null) |
| + throw new Exception("Symbol '${_symbol.name}' is undefined"); |
| + return _symbol._rule.match(state, pos); |
| + } |
| + |
| + bool get generatesValue() => true; |
| + |
| + toString() => '<${_symbol.name}>'; |
| +} |
| + |
| +class _SkipRule extends _Rule { |
| + // A rule that has no value. |
| + _Rule _rule; |
| + _SkipRule(_Rule this._rule); |
| + _match(_ParserState state, int pos) { |
| + var match = _rule.matchAfterWS(state, pos); |
| + if (match == null) |
| + return null; |
| + return [match[0], null]; |
| + } |
| + |
| + toString() => 'TOKEN($_rule)'; |
| +} |
| + |
| +class _StringRule extends _Rule implements _Expectable { |
| + final String _string; |
| + int _len; |
| + _StringRule(this._string) { |
| + _len = _string.length; |
| + } |
| + |
| + _match(_ParserState state, int pos) { |
| + if (pos + _len > state._end) |
|
Jennifer Messerly
2011/10/28 01:21:10
should be >=, to match a string at the end ?
sra1
2011/10/28 05:06:25
It is correct. Normally you check pos < end. But
|
| + return null; |
| + for (int i = 0; i < _len; i++) { |
| + if (state._text.charCodeAt(pos + i) != _string.charCodeAt(i)) |
| + return null; |
| + } |
| + return [pos + _len, null]; |
| + } |
| + |
| + //get defaultValue() => _string; |
| + |
| + toString() => '"$_string"'; |
| + |
| + description() => "'$_string'"; |
| +} |
| + |
| +class _RegExpRule extends _Rule { |
| + RegExp _re; |
| + _RegExpRule(this._re) { |
| + // There is no convenient way to match an anchored substring. |
| + throw new Exception('RegExp matching not supported'); |
| + } |
| + |
| + toString() => '"$_re"'; |
| +} |
| + |
| +class _LexicalRule extends _Rule implements _Expectable { |
| + final String _name; |
| + final _Rule _rule; |
| + |
| + _LexicalRule(String this._name, _Rule this._rule); |
| + |
| + _match(_ParserState state, int pos) { |
| + state.in_whitespace_mode = true; |
| + state.inhibitExpectedTrackingDepth++; |
| + var match = _rule.matchAfterWS(state, pos); |
| + state.inhibitExpectedTrackingDepth--; |
| + state.in_whitespace_mode = false; |
| + return match; |
| + } |
| + |
| + toString() => _name; |
| + |
| + description() => _name == null ? '?' : _name; |
| +} |
| + |
| +class _TextValueRule extends _Rule { |
| + final _Rule _rule; |
| + final _extract; // Function |
| + |
| + _TextValueRule(_Rule this._rule, Function this._extract); |
| + |
| + _match(_ParserState state, int pos) { |
| + var match = _rule.matchAfterWS(state, pos); |
| + if (match == null) { |
| + return null; |
| + } |
| + var endPos = match[0]; |
| + return [endPos, _extract(state._text, pos, endPos)]; |
| + } |
| + |
| + bool get generatesValue() => true; |
| + |
| + toString() => 'TEXT($_rule)'; |
| +} |
| + |
| +_Rule _compileMultiRule(List rules, |
| + bool allowReducer, |
| + finish(compiledRules, valueCount, reducer)) { |
| + int valueCount = 0; |
| + List compiledRules = new List<_Rule>(); |
| + Function reducer; |
| + for (var rule in rules) { |
| + if (reducer != null) |
| + throw new Exception('Reducer must be last in sequence: $rule'); |
| + if (rule is Function) { |
| + if (allowReducer) |
| + reducer = rule; |
| + else |
| + throw new Exception('Bad rule: "$rule"'); |
| + } else { |
| + _Rule compiledRule = _compile(rule); |
| + if (compiledRule.generatesValue) |
| + ++valueCount; |
| + compiledRules.add(compiledRule); |
| + } |
| + } |
| + return finish(compiledRules, valueCount, reducer); |
| +} |
| + |
| +String _formatMultiRule(String functor, List rules) { |
| + var sb = new StringBuffer(functor); |
| + sb.add('('); |
| + var separator = ''; |
| + for (var rule in rules) { |
| + sb.add(separator); |
| + sb.add(rule); |
| + separator = ','; |
| + } |
| + sb.add(')'); |
| + return sb.toString(); |
| +} |
| + |
| +class _SequenceRule extends _Rule { |
| + // This rule matches the component rules in order. |
| + final List<_Rule> _rules; |
| + final int _generatingSubRules = 0; |
| + final Function _reducer; |
| + bool _generatesValue; |
| + _SequenceRule(List<_Rule> this._rules, |
| + int this._generatingSubRules, |
| + Function this._reducer) { |
| + _generatesValue = _generatingSubRules > 0 || _reducer != null; |
| + } |
| + |
| + _match(state, pos) { |
| + var sequence = []; |
| + for (var rule in _rules) { |
| + var match = rule.match(state, pos); |
| + if (match == null) |
| + return null; |
| + if (rule.generatesValue) { |
| + var ast = match[1]; |
| + sequence.add(ast); |
| + } |
| + pos = match[0]; |
| + } |
| + if (_reducer == null) { |
| + if (_generatingSubRules == 0) |
| + return [pos, null]; |
| + if (_generatingSubRules == 1) |
| + return [pos, sequence[0]]; |
| + return [pos, sequence]; |
| + } else { |
| + return [pos, _apply(_reducer, sequence)]; |
| + } |
| + } |
| + |
| + bool get generatesValue() => _generatesValue; |
| + |
| + toString() => _formatMultiRule('SEQ', _rules); |
| +} |
| + |
| +class _ChoiceRule extends _Rule { |
| + // This rule matches the first component rule that matches. |
| + List<_Rule> _rules; |
| + _ChoiceRule(List<_Rule> this._rules); |
| + |
| + _match(state, pos) { |
| + for (var rule in _rules) { |
| + var match = rule.match(state, pos); |
| + if (match != null) { |
| + /* |
| + if (!rule.generatesValue) { |
| + var value = rule.defaultValue; |
| + if (value != null) |
| + return [match[0], value]; |
| + } |
| + */ |
| + return match; |
| + } |
| + } |
| + return null; |
| + } |
| + |
| + bool get generatesValue() => true; |
| + |
| + toString() => _formatMultiRule('OR', _rules); |
| +} |
| + |
| +class _OptionalRule extends _Rule { |
| + _Rule _rule; |
| + _OptionalRule(_Rule this._rule); |
| + _match(_ParserState state, int pos) { |
| + var match = _rule.match(state, pos); |
| + if (_rule.generatesValue) |
| + if (match == null) |
|
Jennifer Messerly
2011/10/28 01:21:10
I often write this as:
return (match == null) ? [p
sra1
2011/10/28 05:06:25
I'm not a fan of ?:
I prefer if-then-else as an ex
|
| + return [pos, null]; |
| + else |
| + return match; |
| + if (match == null) |
| + return [pos, false]; |
| + else |
| + return [match[0], true]; |
| + } |
| + |
| + bool get generatesValue() => true; |
| + |
| + toString() => 'MAYBE($_rule)'; |
| +} |
| + |
| +class _ContextRule extends _Rule { |
| + _Rule _rule; |
| + _ContextRule(_Rule this._rule); |
| + _match(_ParserState state, int pos) { |
| + // TODO: protect error state. |
| + var match = _rule._match(state, pos); |
| + if (match == null) |
| + return null; |
| + return [pos, null]; |
| + } |
| + |
| + toString() => 'AT($_rule)'; |
| +} |
| + |
| +class _NegativeContextRule extends _Rule { |
| + _Rule _rule; |
| + _NegativeContextRule(_Rule this._rule); |
| + _match(_ParserState state, int pos) { |
| + // TODO: protect error state. |
| + var match = _rule._match(state, pos); |
| + if (match == null) |
| + return [pos, null]; |
| + return null; |
| + } |
| + |
| + toString() => 'NOT($_rule)'; |
| +} |
| + |
| +class _RepeatRule extends _Rule { |
| + // Matches zero, one or more items. |
| + _Rule _rule; |
| + _Rule _separator; |
| + int _min; |
| + |
| + _RepeatRule(this._rule, this._separator, this._min); |
| + |
| + _match(state, pos) { |
| + // First match. |
| + var match = _rule.match(state, pos); |
| + if (match == null) |
| + if (_min == 0) |
| + return [pos, []]; |
| + else |
| + return null; |
| + pos = match[0]; |
| + var result = [match[1]]; |
| + |
| + // Subsequent matches: |
| + while (true) { |
| + var newPos = pos; |
| + if (_separator != null) { |
| + match = _separator.match(state, pos); |
| + if (match == null) |
| + return [pos, result]; |
| + newPos = match[0]; |
| + } |
| + match = _rule.match(state, newPos); |
| + if (match == null) |
| + return [pos, result]; |
| + pos = match[0]; |
| + result.add(match[1]); |
| + } |
| + } |
| + |
| + bool get generatesValue() => true; |
| + |
| + toString() => 'MANY(min:$_min, $_rule${_separator==null?'':", sep: $_separator"})'; |
| +} |
| + |
| +class _MemoRule extends _Rule { |
| + final _Rule _rule; |
| + |
| + var parseInstance; |
| + |
| + // A map from position to result. Can this be replaced with something |
| + // smaller? |
| + // TODO: figure out how to discard the map and parseInstance after parsing. |
|
Jennifer Messerly
2011/10/28 01:21:10
Well, normally you'd have the memoization be per-r
|
| + Map<int,Object> map; |
| + |
| + _MemoRule(this._rule); |
| + |
| + _match(state, pos) { |
| + // See if we are still parsing the same input. Relies on the fact that the |
| + // input is a string ans string are immutable. |
| + if (parseInstance !== state._text) { |
| + map = new Map<int,Object>(); |
| + parseInstance = state._text; |
| + } |
| + // TODO: does this have to check or preserve parse state (like |
| + // in_whitespace_mode, error position info etc?) |
| + if (map.containsKey(pos)) { |
| + //print('Found $pos $_rule => ${map[pos][0]}'); |
| + return map[pos]; |
|
Jennifer Messerly
2011/10/28 01:21:10
btw: instead of doing two lookups via "map.contain
sra1
2011/10/28 05:06:25
But null can be stored in the map!
|
| + } |
| + var match = _rule.match(state, pos); |
| + map[pos] = match; |
| + return match; |
| + } |
| + |
| + bool get generatesValue() => _rule.generatesValue; |
| + |
| + toString() => 'MEMO($_rule)'; |
| +} |
| + |
| +_apply(fn, List args) { |
| + switch (args.length) { |
| + case 0: return fn(); |
| + case 1: return fn(args[0]); |
| + case 2: return fn(args[0], args[1]); |
| + case 3: return fn(args[0], args[1], args[2]); |
| + case 4: return fn(args[0], args[1], args[2], args[3]); |
| + case 5: return fn(args[0], args[1], args[2], args[3], args[4]); |
| + case 6: return fn(args[0], args[1], args[2], args[3], args[4], |
| + args[5]); |
| + case 7: return fn(args[0], args[1], args[2], args[3], args[4], |
| + args[5], args[6]); |
| + case 8: return fn(args[0], args[1], args[2], args[3], args[4], |
| + args[5], args[6], args[7]); |
| + case 9: return fn(args[0], args[1], args[2], args[3], args[4], |
| + args[5], args[6], args[7], args[8]); |
| + case 10: return fn(args[0], args[1], args[2], args[3], args[4], |
| + args[5], args[6], args[7], args[8], args[9]); |
| + |
| + default: |
| + throw new Exception('Too many arguments in _apply: $args'); |
| + } |
| +} |
| + |
| +List _unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z) { |
| + List list = new List(); |
| + add(element) { if (element != null) list.add(element); } |
| + add(a); |
| + add(b); |
| + add(c); |
| + add(d); |
| + add(e); |
| + add(f); |
| + add(g); |
| + add(h); |
| + add(i); |
| + add(j); |
| + add(k); |
| + add(l); |
| + add(m); |
| + add(n); |
| + add(o); |
| + add(p); |
| + add(q); |
| + add(r); |
| + add(s); |
| + add(t); |
| + add(u); |
| + add(v); |
| + add(w); |
| + add(x); |
| + add(y); |
| + add(z); |
| + return list; |
| +} |