Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #library('Peg Parser'); | |
|
Jennifer Messerly
2011/10/28 01:21:10
Do they intend to allow spaces in the name? (I'm n
sra1
2011/10/28 05:06:25
My working hypothesis is if it was meant to be an
| |
| 2 | |
| 3 /* | |
| 4 * The following functions are combinators for building Rules. | |
|
Jennifer Messerly
2011/10/28 01:21:10
awesome comment!
| |
| 5 * | |
| 6 * A rule is one of the following | |
| 7 * - A String which matches the string literally. | |
| 8 * - A Symbol which matches the symbol's definition. | |
| 9 * - A list of rules with an optional reducing function, which matches a sequenc e. | |
| 10 * - The result of calling one of the combinators. | |
| 11 * | |
| 12 * Some rules are 'value-generating' rules, they return an 'abstract syntax | |
| 13 * tree' with the match. If a rule is not value-generating [:null:] is the | |
| 14 * value. | |
| 15 * | |
| 16 * A Symbol is always a value-generating rule. If the value is not required, use | |
| 17 * [:SKIP(aSymbol):] in place of [:aSymbol:]. | |
| 18 * | |
| 19 * A String is not a value-generating rule but can be converted into one by | |
| 20 * using [:TEXT('string'):] in place of [:'string':]. | |
| 21 * | |
| 22 * A list or sequence is value-generating depending on the subrules. The | |
| 23 * sequence is value-generating if any of the subrules are value-generating or | |
| 24 * if there is a reducing function. If no reducing function is given, the value | |
| 25 * returned depends on the number of value-generating subrules. If there is | |
| 26 * only one value generating subrule, that provideds the value for the sequence. | |
| 27 * If there are more, then the value is a list of the values of the | |
| 28 * value-generating subrules. | |
| 29 */ | |
| 30 | |
| 31 /** | |
| 32 * Matches one character by a predicate on the character code. | |
| 33 * If [spec] is an int, that character is matched. | |
| 34 * If [spec] is a function it is used | |
| 35 * | |
| 36 * Example [: CHARCODE((code) => 48 <= code && code <= 57) :] recognizes an | |
| 37 * ASCII digit. | |
| 38 * | |
| 39 * CHARCODE does not generate a value. | |
| 40 */ | |
| 41 _Rule CHARCODE(spec, [name]) { | |
| 42 if (spec is int) | |
|
Jennifer Messerly
2011/10/28 01:21:10
I hesitate to suggest style for someone else's cod
sra1
2011/10/28 05:06:25
I'm experimenting. cpp style does not actually say
| |
| 43 return new _CharCodeRule((code) => code == spec, name); | |
| 44 else | |
| 45 return new _CharCodeRule(spec, name); | |
| 46 } | |
| 47 | |
| 48 /** | |
| 49 * Matches one of the [characters]. | |
| 50 * | |
| 51 * CHAR does not generate a value. | |
| 52 */ | |
| 53 _Rule CHAR([characters]) { | |
| 54 if (characters == null) | |
| 55 return new _AnyCharRule(); | |
|
Jennifer Messerly
2011/10/28 01:21:10
const?
sra1
2011/10/28 05:06:25
Done.
| |
| 56 if (characters is int) | |
| 57 return CHARCODE(characters); | |
| 58 | |
| 59 // Find the range of character codes and construct an array of flags for codes | |
| 60 // within the range. | |
| 61 List<int> codes = characters.charCodes(); | |
| 62 codes.sort((a, b) => a < b ? -1 : a > b ? 1 : 0); | |
|
Jennifer Messerly
2011/10/28 01:21:10
codes.sort((a, b) => a.compareTo(b));
...since num
sra1
2011/10/28 05:06:25
I could, but the Frog code is kind of disgusting.
| |
| 63 int lo = codes[0]; | |
| 64 int hi = codes[codes.length - 1]; | |
| 65 if (lo == hi) | |
| 66 return CHARCODE(lo); | |
| 67 int len = hi - lo + 1; | |
| 68 List<bool> flags = new List<bool>(len); | |
|
Jennifer Messerly
2011/10/28 01:21:10
style nit: repeated type name
sra1
2011/10/28 05:06:25
Done.
| |
| 69 for (int i = 0; i < len; ++i) | |
| 70 flags[i] = false; | |
|
Jennifer Messerly
2011/10/28 01:21:10
I wonder if we should have a BitList class somewhe
sra1
2011/10/28 05:06:25
The bitlist would probably be slower in access too
| |
| 71 for (int code in codes) | |
| 72 flags[code - lo] = true; | |
| 73 | |
| 74 return CHARCODE((code) => code >= lo && code <= hi && flags[code - lo]); | |
| 75 } | |
| 76 | |
| 77 /** | |
| 78 * Matches the end of the input. | |
| 79 * | |
| 80 * END does not generate a value. | |
| 81 */ | |
| 82 _Rule get END() => new _EndOfInputRule(); | |
| 83 | |
| 84 /** | |
| 85 * Throws an exception. | |
| 86 */ | |
| 87 _Rule ERROR(String message) => new _ErrorRule(message); | |
| 88 | |
| 89 /** | |
| 90 * Matches [rule] but does not consume the input. Useful for matching a right | |
| 91 * context. | |
| 92 * | |
| 93 * AT does not generate a value. | |
| 94 */ | |
| 95 _Rule AT(rule) => new _ContextRule(_compile(rule)); | |
| 96 | |
| 97 /** | |
| 98 * Matches when [rule] does not match. No input is consumed. | |
| 99 * | |
| 100 * NOT does not generate a value. | |
| 101 */ | |
| 102 _Rule NOT(rule) => new _NegativeContextRule(_compile(rule)); | |
| 103 | |
| 104 /** | |
| 105 * Matches [rule] but generates no value even if [rule] generates a value. | |
| 106 * | |
| 107 * SKIP never generates a value. | |
| 108 */ | |
| 109 _Rule SKIP(rule) => new _SkipRule(_compile(rule)); | |
| 110 | |
| 111 /** | |
| 112 * Matches [rule] in a lexical context where whitespace is not automatically | |
| 113 * skipped. Useful for matching what would normally be considered to be tokens. | |
| 114 * [name] is a user-friendly description of what is being matched and is used in | |
| 115 * error messages. | |
| 116 * | |
| 117 * LEX(rule) | |
| 118 * LEX(name, rule) | |
| 119 * | |
| 120 * LEX does not generate a value. If a value is required, wrap LEX with TEXT. | |
| 121 */ | |
| 122 _Rule LEX(arg1, [arg2]) { | |
| 123 if (arg2 == null) | |
| 124 return new _LexicalRule(arg1 is String ? arg1 : null, _compile(arg1)); | |
| 125 else | |
| 126 return new _LexicalRule(arg1, _compile(arg2)); | |
| 127 } | |
| 128 | |
| 129 /** | |
| 130 * Matches [rule] and generates a value from the matched text. If the [rule] | |
| 131 * matches, then TEXT(rule) matches and has a value derived from the string | |
| 132 * fragment that was matched. The default derived value is the string fragment. | |
| 133 * | |
| 134 * TEXT always generates a value. | |
| 135 */ | |
| 136 _Rule TEXT(rule, [extractor]) => | |
| 137 new _TextValueRule(_compile(rule), | |
| 138 extractor == null | |
| 139 ? (string, start, end) => string.substring(start, end) | |
| 140 : extractor); | |
| 141 | |
| 142 /** | |
| 143 * Matches an optional rule. | |
| 144 * | |
| 145 * MAYBE is a value generating matcher. | |
| 146 * | |
| 147 * If [rule] is value generating then the value is the value generated by [rule] | |
| 148 * if it matches, and [:null:] if it does not. | |
| 149 * | |
| 150 * If [rule] is not value generatinge then the value is [:true:] if [rule] | |
| 151 * matches and [:false:] if it does not. | |
| 152 */ | |
| 153 _Rule MAYBE(rule) => new _OptionalRule(_compile(rule)); | |
| 154 | |
| 155 /** | |
| 156 * MANY(rule) matches [rule] [min] or more times. | |
| 157 * [min] must be 0 or 1. | |
| 158 * If [separator] is provided it is used to match a separator between matches of | |
| 159 * [rule]. | |
| 160 * | |
| 161 * MANY is a value generating matcher. The value is a list of the matches of | |
| 162 * [rule]. The list may be empty if [:min == 0:]. | |
| 163 */ | |
| 164 _Rule MANY(rule, [separator = null, int min = 1]) { | |
| 165 assert(0 <= min && min <= 1); | |
| 166 return new _RepeatRule(_compile(rule), _compile_optional(separator), min); | |
| 167 } | |
| 168 | |
| 169 /** | |
| 170 * Matches [rule] zero or more times. Shorthand for [:MANY(rule, min:0):] | |
| 171 * TODO: retire min: parameter? | |
| 172 * | |
| 173 * MANY0 is a value generating matcher. | |
| 174 */ | |
| 175 _Rule MANY0(rule, [separator = null]) { | |
| 176 return new _RepeatRule(_compile(rule), _compile_optional(separator), 0); | |
| 177 } | |
| 178 | |
| 179 /** | |
| 180 * Matches [rules] in order until one succeeds. | |
| 181 * | |
| 182 * OR is value-generating. | |
| 183 */ | |
| 184 _Rule OR([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]) => | |
|
Jennifer Messerly
2011/10/28 01:21:10
something tells me you wanted spread/rest args :)
sra1
2011/10/28 05:06:25
Yes, it looks bad because [ means both sequence an
| |
| 185 _compileMultiRule( | |
| 186 (a is List && b == null) // Backward compat. OR([a, b]) => OR(a, b). | |
| 187 ? a | |
| 188 : _unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z), | |
| 189 false, | |
| 190 (compiledRules, valueCount, reducer) => | |
| 191 new _ChoiceRule(compiledRules)); | |
| 192 | |
| 193 | |
| 194 | |
| 195 _Rule SEQ([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]) => | |
| 196 _compile(_unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z)); | |
| 197 | |
| 198 /** | |
| 199 * Matches [rule] | |
| 200 */ | |
| 201 _Rule MEMO(rule) => new _MemoRule(_compile(rule)); | |
| 202 | |
| 203 _Rule TAG(tag, rule) => _compile([rule, (ast) => [tag, ast]]); | |
| 204 | |
| 205 | |
| 206 class ParseError implements Exception { | |
| 207 const ParseError(String this._message); | |
| 208 String toString() => _message; | |
| 209 final String _message; | |
| 210 } | |
| 211 | |
| 212 /** | |
| 213 * A grammar is a collection of symbols and rules that may be used to parse an | |
| 214 * input. | |
| 215 */ | |
| 216 class Grammar { | |
|
Jennifer Messerly
2011/10/28 01:21:10
In theory, you could get by without this class? It
sra1
2011/10/28 05:06:25
In a different implementation the Grammar would be
| |
| 217 Map<String, Symbol> _symbols; | |
| 218 | |
| 219 /** This rule may be set by the user to define whitespace. */ | |
| 220 _Rule _whitespace; | |
| 221 | |
| 222 _Rule get whitespace() => _whitespace; | |
| 223 void set whitespace(rule) { _whitespace = _compile(rule); } | |
| 224 | |
| 225 Grammar() { | |
| 226 _symbols = new Map<String, Symbol>(); | |
| 227 //whitespace = OR([' ', '\t', '\r', '\n']); | |
|
Jennifer Messerly
2011/10/28 01:21:10
remove?
sra1
2011/10/28 05:06:25
Done.
| |
| 228 this.whitespace = CHAR(' \t\r\n'); | |
|
Jennifer Messerly
2011/10/28 01:21:10
shouldn't need "this." anymore now that the bug is
sra1
2011/10/28 05:06:25
Done.
| |
| 229 } | |
| 230 | |
| 231 /** | |
| 232 * operator [] is used to find or create symbols. Symbols may appear in rules | |
| 233 * to define recursive rules. | |
| 234 */ | |
| 235 Symbol operator [](String name) { | |
|
Jennifer Messerly
2011/10/28 01:21:10
Could define a setter too that would set the Symbo
sra1
2011/10/28 05:06:25
The whole thing with having to define symbols and
| |
| 236 if (_symbols.containsKey(name)) | |
| 237 return _symbols[name]; | |
| 238 Symbol s = new Symbol(name, this); | |
| 239 _symbols[name] = s; | |
| 240 return s; | |
| 241 } | |
| 242 | |
| 243 /** | |
| 244 * Parses the input string and returns the parsed AST, or throws an exception | |
| 245 * if the input can't be parsed. | |
| 246 */ | |
| 247 parse(root, String text) { | |
| 248 for (var symbol in _symbols.getValues()) | |
|
Jennifer Messerly
2011/10/28 01:21:10
style nit: curlies
| |
| 249 if (symbol._rule == null) | |
| 250 print('${symbol.name} is undefined'); | |
| 251 | |
| 252 var state = new _ParserState(text, whitespace: whitespace); | |
| 253 var match = _compile(root).match(state, 0); | |
| 254 if (match == null) | |
| 255 return diagnose(state); | |
| 256 var pos = match[0]; | |
| 257 pos = _skip_whitespace(state, pos); | |
| 258 if (pos == state._end) | |
| 259 return match[1]; | |
| 260 return diagnose(state); | |
| 261 } | |
| 262 | |
| 263 diagnose(state) { | |
| 264 // TODO: locate error and describe. | |
|
Jennifer Messerly
2011/10/28 01:21:10
ah, error messages, the hard part of Packrat parse
| |
| 265 print('Parse failed at position ${state.max_pos}'); | |
| 266 var message = 'unexpected error'; | |
| 267 if (state.max_rule.isEmpty()) { | |
| 268 var s = new Set(); | |
| 269 for (var rule in state.max_rule) | |
|
Jennifer Messerly
2011/10/28 01:21:10
if we got here, isn't state.max_rule empty? should
sra1
2011/10/28 05:06:25
Good catch. I was wondering where the informative
| |
| 270 s.add(rule.description()); | |
| 271 var tokens = new List<String>.from(s); | |
| 272 tokens.sort((a, b) => a.startsWith("'")==b.startsWith("'") ? a.compareTo(b ) | |
|
Jennifer Messerly
2011/10/28 01:21:10
style nit: long line, spaces etc
sra1
2011/10/28 05:06:25
Done.
| |
| 273 : a.startsWith("'") ? +1 : -1); | |
| 274 var expected = Strings.join(tokens, ' or '); | |
| 275 var found = state.max_pos == state._end ? 'end of file' | |
| 276 : "'${state._text[state.max_pos]}'"; | |
| 277 message = 'Expected $expected but found $found'; | |
| 278 } | |
| 279 int start = state.max_pos; | |
| 280 int end = start; | |
| 281 while (start >= 1 && state._text[start - 1] != '\n') --start; | |
| 282 while (end < state._text.length && state._text[end] != '\n') ++end; | |
| 283 var line = state._text.substring(start, end); | |
| 284 var indicator = ''; | |
| 285 for (var i = 0; i < line.length && start + i < state.max_pos; i++) | |
| 286 indicator = indicator + ' '; | |
| 287 indicator = indicator + '^'; | |
| 288 print(message); | |
| 289 print(line); | |
| 290 print(indicator); | |
| 291 return null; | |
| 292 } | |
| 293 } | |
| 294 | |
| 295 class Symbol { | |
| 296 final String name; | |
| 297 final Grammar grammar; | |
| 298 _Rule _rule; | |
| 299 | |
| 300 Symbol(this.name, this.grammar); | |
| 301 | |
| 302 void set def(rule) { | |
| 303 assert(_rule == null); // Assign once. | |
| 304 _rule = _compile(rule); | |
| 305 } | |
| 306 | |
| 307 toString() => _rule == null ? '<$name>' : '<$name = $_rule>'; | |
| 308 } | |
| 309 | |
| 310 | |
| 311 class _ParserState { | |
| 312 _ParserState(this._text, [_Rule whitespace = null]) { | |
| 313 _end = this._text.length; | |
| 314 whitespace_rule = whitespace; | |
| 315 max_rule = []; | |
| 316 } | |
| 317 | |
| 318 String _text; | |
| 319 int _end; | |
| 320 | |
| 321 // | |
| 322 bool in_whitespace_mode = false; | |
|
Jennifer Messerly
2011/10/28 01:21:10
style nit: camelCase
sra1
2011/10/28 05:06:25
Done.
| |
| 323 _Rule whitespace_rule = null; | |
| 324 | |
| 325 // Used for constructing an error message. | |
| 326 int inhibitExpectedTrackingDepth = 0; | |
| 327 int max_pos = 0; | |
| 328 var max_rule; | |
| 329 } | |
| 330 | |
| 331 /** | |
| 332 * An interface tag for rules. If this tag is on a rule, then the description() | |
| 333 * of the rule is something sensible to put in a message. | |
| 334 */ | |
| 335 interface _Expectable { | |
| 336 String description(); | |
| 337 } | |
| 338 | |
| 339 class _Rule { | |
| 340 // Returns null for a match failure or [pos, ast] for success. | |
| 341 match(_ParserState state, int pos) { | |
| 342 if (! state.in_whitespace_mode) { | |
| 343 pos = _skip_whitespace(state, pos); | |
| 344 } | |
| 345 return matchAfterWS(state, pos); | |
| 346 } | |
| 347 | |
| 348 // Faster entry point for matching a sub-rule that is matched to the start | |
| 349 // position of the super-rule. Whitespace has already been skipped so no need | |
| 350 // to try to skip it again. | |
| 351 matchAfterWS(_ParserState state, int pos) { | |
| 352 if (state.inhibitExpectedTrackingDepth == 0) { | |
| 353 // Track position for possible error messaging | |
| 354 if (pos > state.max_pos) { | |
| 355 // Store position and the rule. | |
| 356 state.max_pos = pos; | |
| 357 if (this is _Expectable) { | |
| 358 state.max_rule = [this]; | |
| 359 } else { | |
| 360 state.max_rule = []; | |
| 361 } | |
| 362 } else if (pos == state.max_pos) { | |
| 363 if (this is _Expectable) { | |
| 364 state.max_rule.add(this); | |
| 365 } | |
| 366 } | |
| 367 } | |
| 368 // Delegate the matching logic to the the specialized function. | |
| 369 return _match(state, pos); | |
| 370 } | |
| 371 | |
| 372 // Overridden in subclasses to match the rule. | |
| 373 _match(_ParserState state, int pos) => null; | |
| 374 | |
| 375 // Does the rule generate a value (AST) with the match? | |
| 376 bool get generatesValue() => false; | |
| 377 | |
| 378 get defaultValue() => null; | |
| 379 } | |
| 380 | |
| 381 int _skip_whitespace(state, pos) { | |
| 382 // Returns the next non-whitespace position. | |
| 383 // This is done by matching the optional whitespace_rule with the current text . | |
| 384 if (state.whitespace_rule == null) | |
| 385 return pos; | |
| 386 state.in_whitespace_mode = true; | |
| 387 state.inhibitExpectedTrackingDepth++; | |
| 388 while (true) { | |
| 389 var match = state.whitespace_rule.match(state, pos); | |
| 390 if (match == null) | |
| 391 break; | |
| 392 pos = match[0]; | |
| 393 } | |
| 394 state.in_whitespace_mode = false; | |
| 395 state.inhibitExpectedTrackingDepth--; | |
| 396 return pos; | |
| 397 } | |
| 398 | |
| 399 | |
| 400 _Rule _compile_optional(rule) { | |
|
Jennifer Messerly
2011/10/28 01:21:10
why not put this check in _compile?
style nit: nam
sra1
2011/10/28 05:06:25
It gives the user better errors if null throws at
| |
| 401 return rule == null ? null : _compile(rule); | |
| 402 } | |
| 403 | |
| 404 _Rule _compile(rule) { | |
|
Jennifer Messerly
2011/10/28 01:21:10
I found the name "compile" a bit confusing here. N
sra1
2011/10/28 05:06:25
Me neither.
| |
| 405 if (rule is _Rule) | |
| 406 return rule; | |
| 407 if (rule is String) | |
| 408 return new _StringRule(rule); | |
| 409 if (rule is Symbol) | |
| 410 return new _SymbolRule(rule); | |
| 411 if (rule is RegExp) | |
| 412 return new _RegExpRule(rule); | |
| 413 if (rule is List) { | |
| 414 return _compileMultiRule( | |
| 415 rule, true, | |
| 416 (compiledRules, valueCount, reducer) => | |
| 417 new _SequenceRule(compiledRules, valueCount, reducer)); | |
| 418 } | |
| 419 throw new Exception('Cannot compile rule: $rule'); | |
| 420 } | |
| 421 | |
| 422 class _EndOfInputRule extends _Rule { | |
| 423 _match(_ParserState state, int pos) { | |
| 424 if (pos == state._end) | |
| 425 return [pos, null]; | |
| 426 return null; | |
| 427 } | |
| 428 | |
| 429 toString() => 'END'; | |
| 430 } | |
| 431 | |
| 432 class _ErrorRule extends _Rule { | |
| 433 String message; | |
| 434 _ErrorRule(String this.message); | |
| 435 _match(_ParserState state, int pos) { | |
| 436 throw new ParseError(message); | |
|
Jennifer Messerly
2011/10/28 01:21:10
include position?
sra1
2011/10/28 05:06:25
The position needs to be converted into something
| |
| 437 } | |
| 438 | |
| 439 toString() => 'ERROR($message)'; | |
| 440 } | |
| 441 | |
| 442 class _CharCodeRule extends _Rule { | |
| 443 Function _predicate; | |
| 444 var _name; | |
| 445 _CharCodeRule(Function this._predicate, this._name); | |
|
Jennifer Messerly
2011/10/28 01:21:10
don't need to repeat the type "Function" here.
| |
| 446 _match(_ParserState state, int pos) { | |
| 447 if (pos == state._end) | |
|
Jennifer Messerly
2011/10/28 01:21:10
I wonder if there's a way to do this without check
sra1
2011/10/28 05:06:25
I din't think there is. Every access to the text
| |
| 448 return null; | |
| 449 int code = state._text.charCodeAt(pos); | |
| 450 if (_predicate(code)) | |
| 451 return [pos + 1, null]; | |
| 452 return null; | |
| 453 } | |
| 454 | |
| 455 toString() => _name == null ? 'CHARCODE($_predicate)' : 'CHARCODE($_name)'; | |
| 456 } | |
| 457 | |
| 458 class _AnyCharRule extends _Rule { | |
| 459 _match(_ParserState state, int pos) { | |
| 460 if (pos == state._end) | |
| 461 return null; | |
| 462 return [pos + 1, null]; | |
| 463 } | |
| 464 | |
| 465 toString() => 'CHAR()'; | |
| 466 } | |
| 467 | |
| 468 class _SymbolRule extends _Rule { | |
| 469 final Symbol _symbol; | |
| 470 _SymbolRule(Symbol this._symbol); | |
| 471 _match(_ParserState state, int pos) { | |
| 472 if (_symbol._rule == null) | |
| 473 throw new Exception("Symbol '${_symbol.name}' is undefined"); | |
| 474 return _symbol._rule.match(state, pos); | |
| 475 } | |
| 476 | |
| 477 bool get generatesValue() => true; | |
| 478 | |
| 479 toString() => '<${_symbol.name}>'; | |
| 480 } | |
| 481 | |
| 482 class _SkipRule extends _Rule { | |
| 483 // A rule that has no value. | |
| 484 _Rule _rule; | |
| 485 _SkipRule(_Rule this._rule); | |
| 486 _match(_ParserState state, int pos) { | |
| 487 var match = _rule.matchAfterWS(state, pos); | |
| 488 if (match == null) | |
| 489 return null; | |
| 490 return [match[0], null]; | |
| 491 } | |
| 492 | |
| 493 toString() => 'TOKEN($_rule)'; | |
| 494 } | |
| 495 | |
| 496 class _StringRule extends _Rule implements _Expectable { | |
| 497 final String _string; | |
| 498 int _len; | |
| 499 _StringRule(this._string) { | |
| 500 _len = _string.length; | |
| 501 } | |
| 502 | |
| 503 _match(_ParserState state, int pos) { | |
| 504 if (pos + _len > state._end) | |
|
Jennifer Messerly
2011/10/28 01:21:10
should be >=, to match a string at the end ?
sra1
2011/10/28 05:06:25
It is correct. Normally you check pos < end. But
| |
| 505 return null; | |
| 506 for (int i = 0; i < _len; i++) { | |
| 507 if (state._text.charCodeAt(pos + i) != _string.charCodeAt(i)) | |
| 508 return null; | |
| 509 } | |
| 510 return [pos + _len, null]; | |
| 511 } | |
| 512 | |
| 513 //get defaultValue() => _string; | |
| 514 | |
| 515 toString() => '"$_string"'; | |
| 516 | |
| 517 description() => "'$_string'"; | |
| 518 } | |
| 519 | |
| 520 class _RegExpRule extends _Rule { | |
| 521 RegExp _re; | |
| 522 _RegExpRule(this._re) { | |
| 523 // There is no convenient way to match an anchored substring. | |
| 524 throw new Exception('RegExp matching not supported'); | |
| 525 } | |
| 526 | |
| 527 toString() => '"$_re"'; | |
| 528 } | |
| 529 | |
| 530 class _LexicalRule extends _Rule implements _Expectable { | |
| 531 final String _name; | |
| 532 final _Rule _rule; | |
| 533 | |
| 534 _LexicalRule(String this._name, _Rule this._rule); | |
| 535 | |
| 536 _match(_ParserState state, int pos) { | |
| 537 state.in_whitespace_mode = true; | |
| 538 state.inhibitExpectedTrackingDepth++; | |
| 539 var match = _rule.matchAfterWS(state, pos); | |
| 540 state.inhibitExpectedTrackingDepth--; | |
| 541 state.in_whitespace_mode = false; | |
| 542 return match; | |
| 543 } | |
| 544 | |
| 545 toString() => _name; | |
| 546 | |
| 547 description() => _name == null ? '?' : _name; | |
| 548 } | |
| 549 | |
| 550 class _TextValueRule extends _Rule { | |
| 551 final _Rule _rule; | |
| 552 final _extract; // Function | |
| 553 | |
| 554 _TextValueRule(_Rule this._rule, Function this._extract); | |
| 555 | |
| 556 _match(_ParserState state, int pos) { | |
| 557 var match = _rule.matchAfterWS(state, pos); | |
| 558 if (match == null) { | |
| 559 return null; | |
| 560 } | |
| 561 var endPos = match[0]; | |
| 562 return [endPos, _extract(state._text, pos, endPos)]; | |
| 563 } | |
| 564 | |
| 565 bool get generatesValue() => true; | |
| 566 | |
| 567 toString() => 'TEXT($_rule)'; | |
| 568 } | |
| 569 | |
| 570 _Rule _compileMultiRule(List rules, | |
| 571 bool allowReducer, | |
| 572 finish(compiledRules, valueCount, reducer)) { | |
| 573 int valueCount = 0; | |
| 574 List compiledRules = new List<_Rule>(); | |
| 575 Function reducer; | |
| 576 for (var rule in rules) { | |
| 577 if (reducer != null) | |
| 578 throw new Exception('Reducer must be last in sequence: $rule'); | |
| 579 if (rule is Function) { | |
| 580 if (allowReducer) | |
| 581 reducer = rule; | |
| 582 else | |
| 583 throw new Exception('Bad rule: "$rule"'); | |
| 584 } else { | |
| 585 _Rule compiledRule = _compile(rule); | |
| 586 if (compiledRule.generatesValue) | |
| 587 ++valueCount; | |
| 588 compiledRules.add(compiledRule); | |
| 589 } | |
| 590 } | |
| 591 return finish(compiledRules, valueCount, reducer); | |
| 592 } | |
| 593 | |
| 594 String _formatMultiRule(String functor, List rules) { | |
| 595 var sb = new StringBuffer(functor); | |
| 596 sb.add('('); | |
| 597 var separator = ''; | |
| 598 for (var rule in rules) { | |
| 599 sb.add(separator); | |
| 600 sb.add(rule); | |
| 601 separator = ','; | |
| 602 } | |
| 603 sb.add(')'); | |
| 604 return sb.toString(); | |
| 605 } | |
| 606 | |
| 607 class _SequenceRule extends _Rule { | |
| 608 // This rule matches the component rules in order. | |
| 609 final List<_Rule> _rules; | |
| 610 final int _generatingSubRules = 0; | |
| 611 final Function _reducer; | |
| 612 bool _generatesValue; | |
| 613 _SequenceRule(List<_Rule> this._rules, | |
| 614 int this._generatingSubRules, | |
| 615 Function this._reducer) { | |
| 616 _generatesValue = _generatingSubRules > 0 || _reducer != null; | |
| 617 } | |
| 618 | |
| 619 _match(state, pos) { | |
| 620 var sequence = []; | |
| 621 for (var rule in _rules) { | |
| 622 var match = rule.match(state, pos); | |
| 623 if (match == null) | |
| 624 return null; | |
| 625 if (rule.generatesValue) { | |
| 626 var ast = match[1]; | |
| 627 sequence.add(ast); | |
| 628 } | |
| 629 pos = match[0]; | |
| 630 } | |
| 631 if (_reducer == null) { | |
| 632 if (_generatingSubRules == 0) | |
| 633 return [pos, null]; | |
| 634 if (_generatingSubRules == 1) | |
| 635 return [pos, sequence[0]]; | |
| 636 return [pos, sequence]; | |
| 637 } else { | |
| 638 return [pos, _apply(_reducer, sequence)]; | |
| 639 } | |
| 640 } | |
| 641 | |
| 642 bool get generatesValue() => _generatesValue; | |
| 643 | |
| 644 toString() => _formatMultiRule('SEQ', _rules); | |
| 645 } | |
| 646 | |
| 647 class _ChoiceRule extends _Rule { | |
| 648 // This rule matches the first component rule that matches. | |
| 649 List<_Rule> _rules; | |
| 650 _ChoiceRule(List<_Rule> this._rules); | |
| 651 | |
| 652 _match(state, pos) { | |
| 653 for (var rule in _rules) { | |
| 654 var match = rule.match(state, pos); | |
| 655 if (match != null) { | |
| 656 /* | |
| 657 if (!rule.generatesValue) { | |
| 658 var value = rule.defaultValue; | |
| 659 if (value != null) | |
| 660 return [match[0], value]; | |
| 661 } | |
| 662 */ | |
| 663 return match; | |
| 664 } | |
| 665 } | |
| 666 return null; | |
| 667 } | |
| 668 | |
| 669 bool get generatesValue() => true; | |
| 670 | |
| 671 toString() => _formatMultiRule('OR', _rules); | |
| 672 } | |
| 673 | |
| 674 class _OptionalRule extends _Rule { | |
| 675 _Rule _rule; | |
| 676 _OptionalRule(_Rule this._rule); | |
| 677 _match(_ParserState state, int pos) { | |
| 678 var match = _rule.match(state, pos); | |
| 679 if (_rule.generatesValue) | |
| 680 if (match == null) | |
|
Jennifer Messerly
2011/10/28 01:21:10
I often write this as:
return (match == null) ? [p
sra1
2011/10/28 05:06:25
I'm not a fan of ?:
I prefer if-then-else as an ex
| |
| 681 return [pos, null]; | |
| 682 else | |
| 683 return match; | |
| 684 if (match == null) | |
| 685 return [pos, false]; | |
| 686 else | |
| 687 return [match[0], true]; | |
| 688 } | |
| 689 | |
| 690 bool get generatesValue() => true; | |
| 691 | |
| 692 toString() => 'MAYBE($_rule)'; | |
| 693 } | |
| 694 | |
| 695 class _ContextRule extends _Rule { | |
| 696 _Rule _rule; | |
| 697 _ContextRule(_Rule this._rule); | |
| 698 _match(_ParserState state, int pos) { | |
| 699 // TODO: protect error state. | |
| 700 var match = _rule._match(state, pos); | |
| 701 if (match == null) | |
| 702 return null; | |
| 703 return [pos, null]; | |
| 704 } | |
| 705 | |
| 706 toString() => 'AT($_rule)'; | |
| 707 } | |
| 708 | |
| 709 class _NegativeContextRule extends _Rule { | |
| 710 _Rule _rule; | |
| 711 _NegativeContextRule(_Rule this._rule); | |
| 712 _match(_ParserState state, int pos) { | |
| 713 // TODO: protect error state. | |
| 714 var match = _rule._match(state, pos); | |
| 715 if (match == null) | |
| 716 return [pos, null]; | |
| 717 return null; | |
| 718 } | |
| 719 | |
| 720 toString() => 'NOT($_rule)'; | |
| 721 } | |
| 722 | |
| 723 class _RepeatRule extends _Rule { | |
| 724 // Matches zero, one or more items. | |
| 725 _Rule _rule; | |
| 726 _Rule _separator; | |
| 727 int _min; | |
| 728 | |
| 729 _RepeatRule(this._rule, this._separator, this._min); | |
| 730 | |
| 731 _match(state, pos) { | |
| 732 // First match. | |
| 733 var match = _rule.match(state, pos); | |
| 734 if (match == null) | |
| 735 if (_min == 0) | |
| 736 return [pos, []]; | |
| 737 else | |
| 738 return null; | |
| 739 pos = match[0]; | |
| 740 var result = [match[1]]; | |
| 741 | |
| 742 // Subsequent matches: | |
| 743 while (true) { | |
| 744 var newPos = pos; | |
| 745 if (_separator != null) { | |
| 746 match = _separator.match(state, pos); | |
| 747 if (match == null) | |
| 748 return [pos, result]; | |
| 749 newPos = match[0]; | |
| 750 } | |
| 751 match = _rule.match(state, newPos); | |
| 752 if (match == null) | |
| 753 return [pos, result]; | |
| 754 pos = match[0]; | |
| 755 result.add(match[1]); | |
| 756 } | |
| 757 } | |
| 758 | |
| 759 bool get generatesValue() => true; | |
| 760 | |
| 761 toString() => 'MANY(min:$_min, $_rule${_separator==null?'':", sep: $_separator "})'; | |
| 762 } | |
| 763 | |
| 764 class _MemoRule extends _Rule { | |
| 765 final _Rule _rule; | |
| 766 | |
| 767 var parseInstance; | |
| 768 | |
| 769 // A map from position to result. Can this be replaced with something | |
| 770 // smaller? | |
| 771 // TODO: figure out how to discard the map and parseInstance after parsing. | |
|
Jennifer Messerly
2011/10/28 01:21:10
Well, normally you'd have the memoization be per-r
| |
| 772 Map<int,Object> map; | |
| 773 | |
| 774 _MemoRule(this._rule); | |
| 775 | |
| 776 _match(state, pos) { | |
| 777 // See if we are still parsing the same input. Relies on the fact that the | |
| 778 // input is a string ans string are immutable. | |
| 779 if (parseInstance !== state._text) { | |
| 780 map = new Map<int,Object>(); | |
| 781 parseInstance = state._text; | |
| 782 } | |
| 783 // TODO: does this have to check or preserve parse state (like | |
| 784 // in_whitespace_mode, error position info etc?) | |
| 785 if (map.containsKey(pos)) { | |
| 786 //print('Found $pos $_rule => ${map[pos][0]}'); | |
| 787 return map[pos]; | |
|
Jennifer Messerly
2011/10/28 01:21:10
btw: instead of doing two lookups via "map.contain
sra1
2011/10/28 05:06:25
But null can be stored in the map!
| |
| 788 } | |
| 789 var match = _rule.match(state, pos); | |
| 790 map[pos] = match; | |
| 791 return match; | |
| 792 } | |
| 793 | |
| 794 bool get generatesValue() => _rule.generatesValue; | |
| 795 | |
| 796 toString() => 'MEMO($_rule)'; | |
| 797 } | |
| 798 | |
| 799 _apply(fn, List args) { | |
| 800 switch (args.length) { | |
| 801 case 0: return fn(); | |
| 802 case 1: return fn(args[0]); | |
| 803 case 2: return fn(args[0], args[1]); | |
| 804 case 3: return fn(args[0], args[1], args[2]); | |
| 805 case 4: return fn(args[0], args[1], args[2], args[3]); | |
| 806 case 5: return fn(args[0], args[1], args[2], args[3], args[4]); | |
| 807 case 6: return fn(args[0], args[1], args[2], args[3], args[4], | |
| 808 args[5]); | |
| 809 case 7: return fn(args[0], args[1], args[2], args[3], args[4], | |
| 810 args[5], args[6]); | |
| 811 case 8: return fn(args[0], args[1], args[2], args[3], args[4], | |
| 812 args[5], args[6], args[7]); | |
| 813 case 9: return fn(args[0], args[1], args[2], args[3], args[4], | |
| 814 args[5], args[6], args[7], args[8]); | |
| 815 case 10: return fn(args[0], args[1], args[2], args[3], args[4], | |
| 816 args[5], args[6], args[7], args[8], args[9]); | |
| 817 | |
| 818 default: | |
| 819 throw new Exception('Too many arguments in _apply: $args'); | |
| 820 } | |
| 821 } | |
| 822 | |
| 823 List _unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z) { | |
| 824 List list = new List(); | |
| 825 add(element) { if (element != null) list.add(element); } | |
| 826 add(a); | |
| 827 add(b); | |
| 828 add(c); | |
| 829 add(d); | |
| 830 add(e); | |
| 831 add(f); | |
| 832 add(g); | |
| 833 add(h); | |
| 834 add(i); | |
| 835 add(j); | |
| 836 add(k); | |
| 837 add(l); | |
| 838 add(m); | |
| 839 add(n); | |
| 840 add(o); | |
| 841 add(p); | |
| 842 add(q); | |
| 843 add(r); | |
| 844 add(s); | |
| 845 add(t); | |
| 846 add(u); | |
| 847 add(v); | |
| 848 add(w); | |
| 849 add(x); | |
| 850 add(y); | |
| 851 add(z); | |
| 852 return list; | |
| 853 } | |
| OLD | NEW |