| OLD | NEW |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 library PegParser; | 5 library PegParser; |
| 6 | 6 |
| 7 /* | 7 /* |
| 8 * The following functions are combinators for building Rules. | 8 * The following functions are combinators for building Rules. |
| 9 * | 9 * |
| 10 * A rule is one of the following | 10 * A rule is one of the following |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 48 else | 48 else |
| 49 return new _CharCodeRule(spec, name); | 49 return new _CharCodeRule(spec, name); |
| 50 } | 50 } |
| 51 | 51 |
| 52 /** | 52 /** |
| 53 * Matches one of the [characters]. | 53 * Matches one of the [characters]. |
| 54 * | 54 * |
| 55 * CHAR does not generate a value. | 55 * CHAR does not generate a value. |
| 56 */ | 56 */ |
| 57 _Rule CHAR([characters]) { | 57 _Rule CHAR([characters]) { |
| 58 if (characters == null) | 58 if (characters == null) return const _AnyCharRule(); |
| 59 return const _AnyCharRule(); | 59 if (characters is int) return CHARCODE(characters); |
| 60 if (characters is int) | |
| 61 return CHARCODE(characters); | |
| 62 | 60 |
| 63 // Find the range of character codes and construct an array of flags for codes | 61 // Find the range of character codes and construct an array of flags for codes |
| 64 // within the range. | 62 // within the range. |
| 65 List<int> codes = characters.codeUnits.toList(); | 63 List<int> codes = characters.codeUnits.toList(); |
| 66 codes.sort((a, b) => a < b ? -1 : a > b ? 1 : 0); | 64 codes.sort((a, b) => a < b ? -1 : a > b ? 1 : 0); |
| 67 int lo = codes[0]; | 65 int lo = codes[0]; |
| 68 int hi = codes[codes.length - 1]; | 66 int hi = codes[codes.length - 1]; |
| 69 if (lo == hi) | 67 if (lo == hi) return CHARCODE(lo); |
| 70 return CHARCODE(lo); | |
| 71 int len = hi - lo + 1; | 68 int len = hi - lo + 1; |
| 72 var flags = new List<bool>(len); | 69 var flags = new List<bool>(len); |
| 73 for (int i = 0; i < len; ++i) | 70 for (int i = 0; i < len; ++i) flags[i] = false; |
| 74 flags[i] = false; | 71 for (int code in codes) flags[code - lo] = true; |
| 75 for (int code in codes) | |
| 76 flags[code - lo] = true; | |
| 77 | 72 |
| 78 return CHARCODE((code) => code >= lo && code <= hi && flags[code - lo]); | 73 return CHARCODE((code) => code >= lo && code <= hi && flags[code - lo]); |
| 79 } | 74 } |
| 80 | 75 |
| 81 /** | 76 /** |
| 82 * Matches the end of the input. | 77 * Matches the end of the input. |
| 83 * | 78 * |
| 84 * END does not generate a value. | 79 * END does not generate a value. |
| 85 */ | 80 */ |
| 86 _Rule get END => new _EndOfInputRule(); | 81 _Rule get END => new _EndOfInputRule(); |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 130 return new _LexicalRule(arg1, _compile(arg2)); | 125 return new _LexicalRule(arg1, _compile(arg2)); |
| 131 } | 126 } |
| 132 | 127 |
| 133 /** | 128 /** |
| 134 * Matches [rule] and generates a value from the matched text. If the [rule] | 129 * Matches [rule] and generates a value from the matched text. If the [rule] |
| 135 * matches, then TEXT(rule) matches and has a value derived from the string | 130 * matches, then TEXT(rule) matches and has a value derived from the string |
| 136 * fragment that was matched. The default derived value is the string fragment. | 131 * fragment that was matched. The default derived value is the string fragment. |
| 137 * | 132 * |
| 138 * TEXT always generates a value. | 133 * TEXT always generates a value. |
| 139 */ | 134 */ |
| 140 _Rule TEXT(rule, [extractor]) => | 135 _Rule TEXT(rule, [extractor]) => new _TextValueRule( |
| 141 new _TextValueRule(_compile(rule), | 136 _compile(rule), |
| 142 extractor == null | 137 extractor == null |
| 143 ? (string, start, end) => string.substring(start, end) | 138 ? (string, start, end) => string.substring(start, end) |
| 144 : extractor); | 139 : extractor); |
| 145 | 140 |
| 146 /** | 141 /** |
| 147 * Matches an optional rule. | 142 * Matches an optional rule. |
| 148 * | 143 * |
| 149 * MAYBE is a value generating matcher. | 144 * MAYBE is a value generating matcher. |
| 150 * | 145 * |
| 151 * If [rule] is value generating then the value is the value generated by [rule] | 146 * If [rule] is value generating then the value is the value generated by [rule] |
| 152 * if it matches, and [:null:] if it does not. | 147 * if it matches, and [:null:] if it does not. |
| 153 * | 148 * |
| 154 * If [rule] is not value generatinge then the value is [:true:] if [rule] | 149 * If [rule] is not value generatinge then the value is [:true:] if [rule] |
| (...skipping 23 matching lines...) Expand all Loading... |
| 178 */ | 173 */ |
| 179 _Rule MANY0(rule, [separator = null]) { | 174 _Rule MANY0(rule, [separator = null]) { |
| 180 return new _RepeatRule(_compile(rule), _compileOptional(separator), 0); | 175 return new _RepeatRule(_compile(rule), _compileOptional(separator), 0); |
| 181 } | 176 } |
| 182 | 177 |
| 183 /** | 178 /** |
| 184 * Matches [rules] in order until one succeeds. | 179 * Matches [rules] in order until one succeeds. |
| 185 * | 180 * |
| 186 * OR is value-generating. | 181 * OR is value-generating. |
| 187 */ | 182 */ |
| 188 _Rule OR([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]) => | 183 _Rule OR( |
| 184 [a, |
| 185 b, |
| 186 c, |
| 187 d, |
| 188 e, |
| 189 f, |
| 190 g, |
| 191 h, |
| 192 i, |
| 193 j, |
| 194 k, |
| 195 l, |
| 196 m, |
| 197 n, |
| 198 o, |
| 199 p, |
| 200 q, |
| 201 r, |
| 202 s, |
| 203 t, |
| 204 u, |
| 205 v, |
| 206 w, |
| 207 x, |
| 208 y, |
| 209 z]) => |
| 189 _compileMultiRule( | 210 _compileMultiRule( |
| 190 (a is List && b == null) // Backward compat. OR([a, b]) => OR(a, b). | 211 (a is List && b == null) // Backward compat. OR([a, b]) => OR(a, b). |
| 191 ? a | 212 ? a |
| 192 : _unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z), | 213 : _unspread(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, |
| 214 t, u, v, w, x, y, z), |
| 193 false, | 215 false, |
| 194 (compiledRules, valueCount, reducer) => | 216 (compiledRules, valueCount, reducer) => new _ChoiceRule(compiledRules)); |
| 195 new _ChoiceRule(compiledRules)); | |
| 196 | 217 |
| 197 | 218 _Rule SEQ( |
| 198 | 219 [a, |
| 199 _Rule SEQ([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]) => | 220 b, |
| 200 _compile(_unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z)); | 221 c, |
| 222 d, |
| 223 e, |
| 224 f, |
| 225 g, |
| 226 h, |
| 227 i, |
| 228 j, |
| 229 k, |
| 230 l, |
| 231 m, |
| 232 n, |
| 233 o, |
| 234 p, |
| 235 q, |
| 236 r, |
| 237 s, |
| 238 t, |
| 239 u, |
| 240 v, |
| 241 w, |
| 242 x, |
| 243 y, |
| 244 z]) => |
| 245 _compile(_unspread(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, |
| 246 t, u, v, w, x, y, z)); |
| 201 | 247 |
| 202 /** | 248 /** |
| 203 * Matches [rule] | 249 * Matches [rule] |
| 204 */ | 250 */ |
| 205 _Rule MEMO(rule) => new _MemoRule(_compile(rule)); | 251 _Rule MEMO(rule) => new _MemoRule(_compile(rule)); |
| 206 | 252 |
| 207 _Rule TAG(tag, rule) => _compile([rule, (ast) => [tag, ast]]); | 253 _Rule TAG(tag, rule) => _compile([ |
| 208 | 254 rule, |
| 255 (ast) => [tag, ast] |
| 256 ]); |
| 209 | 257 |
| 210 class ParseError implements Exception { | 258 class ParseError implements Exception { |
| 211 const ParseError(String this._message); | 259 const ParseError(String this._message); |
| 212 String toString() => _message; | 260 String toString() => _message; |
| 213 final String _message; | 261 final String _message; |
| 214 } | 262 } |
| 215 | 263 |
| 216 /** | 264 /** |
| 217 * A grammar is a collection of symbols and rules that may be used to parse an | 265 * A grammar is a collection of symbols and rules that may be used to parse an |
| 218 * input. | 266 * input. |
| 219 */ | 267 */ |
| 220 class Grammar { | 268 class Grammar { |
| 221 Map<String, Symbol> _symbols; | 269 Map<String, Symbol> _symbols; |
| 222 | 270 |
| 223 /** This rule may be set by the user to define whitespace. */ | 271 /** This rule may be set by the user to define whitespace. */ |
| 224 _Rule _whitespace; | 272 _Rule _whitespace; |
| 225 | 273 |
| 226 _Rule get whitespace => _whitespace; | 274 _Rule get whitespace => _whitespace; |
| 227 void set whitespace(rule) { _whitespace = _compile(rule); } | 275 void set whitespace(rule) { |
| 276 _whitespace = _compile(rule); |
| 277 } |
| 228 | 278 |
| 229 Grammar() { | 279 Grammar() { |
| 230 _symbols = new Map<String, Symbol>(); | 280 _symbols = new Map<String, Symbol>(); |
| 231 whitespace = CHAR(' \t\r\n'); | 281 whitespace = CHAR(' \t\r\n'); |
| 232 } | 282 } |
| 233 | 283 |
| 234 /** | 284 /** |
| 235 * operator [] is used to find or create symbols. Symbols may appear in rules | 285 * operator [] is used to find or create symbols. Symbols may appear in rules |
| 236 * to define recursive rules. | 286 * to define recursive rules. |
| 237 */ | 287 */ |
| 238 Symbol operator [](String name) { | 288 Symbol operator [](String name) { |
| 239 if (_symbols.containsKey(name)) | 289 if (_symbols.containsKey(name)) return _symbols[name]; |
| 240 return _symbols[name]; | |
| 241 Symbol s = new Symbol(name, this); | 290 Symbol s = new Symbol(name, this); |
| 242 _symbols[name] = s; | 291 _symbols[name] = s; |
| 243 return s; | 292 return s; |
| 244 } | 293 } |
| 245 | 294 |
| 246 /** | 295 /** |
| 247 * Parses the input string and returns the parsed AST, or throws an exception | 296 * Parses the input string and returns the parsed AST, or throws an exception |
| 248 * if the input can't be parsed. | 297 * if the input can't be parsed. |
| 249 */ | 298 */ |
| 250 parse(root, String text) { | 299 parse(root, String text) { |
| 251 for (var symbol in _symbols.values) | 300 for (var symbol in _symbols.values) |
| 252 if (symbol._rule == null) | 301 if (symbol._rule == null) print('${symbol.name} is undefined'); |
| 253 print('${symbol.name} is undefined'); | |
| 254 | 302 |
| 255 var state = new _ParserState(text, whitespace: whitespace); | 303 var state = new _ParserState(text, whitespace: whitespace); |
| 256 var match = _compile(root).match(state, 0); | 304 var match = _compile(root).match(state, 0); |
| 257 if (match == null) | 305 if (match == null) return diagnose(state); |
| 258 return diagnose(state); | |
| 259 var pos = match[0]; | 306 var pos = match[0]; |
| 260 pos = _skip_whitespace(state, pos); | 307 pos = _skip_whitespace(state, pos); |
| 261 if (pos == state._end) | 308 if (pos == state._end) return match[1]; |
| 262 return match[1]; | |
| 263 // TODO: Make this complain about expecting end of file. | 309 // TODO: Make this complain about expecting end of file. |
| 264 return diagnose(state); | 310 return diagnose(state); |
| 265 } | 311 } |
| 266 | 312 |
| 267 diagnose(state) { | 313 diagnose(state) { |
| 268 var message = 'unexpected error'; | 314 var message = 'unexpected error'; |
| 269 if (!state.max_rule.isEmpty) { | 315 if (!state.max_rule.isEmpty) { |
| 270 var s = new Set(); | 316 var s = new Set(); |
| 271 for (var rule in state.max_rule) | 317 for (var rule in state.max_rule) s.add(rule.description()); |
| 272 s.add(rule.description()); | |
| 273 var tokens = new List<String>.from(s); | 318 var tokens = new List<String>.from(s); |
| 274 tokens.sort((a, b) => | 319 tokens.sort((a, b) => a.startsWith("'") == b.startsWith("'") |
| 275 a.startsWith("'") == b.startsWith("'") | 320 ? a.compareTo(b) |
| 276 ? a.compareTo(b) | 321 : a.startsWith("'") ? 1 : -1); |
| 277 : a.startsWith("'") ? 1 : -1); | |
| 278 var expected = tokens.join(' or '); | 322 var expected = tokens.join(' or '); |
| 279 var found = state.max_pos == state._end ? 'end of file' | 323 var found = state.max_pos == state._end |
| 324 ? 'end of file' |
| 280 : "'${state._text[state.max_pos]}'"; | 325 : "'${state._text[state.max_pos]}'"; |
| 281 message = 'Expected $expected but found $found'; | 326 message = 'Expected $expected but found $found'; |
| 282 } | 327 } |
| 283 int start = state.max_pos; | 328 int start = state.max_pos; |
| 284 int end = start; | 329 int end = start; |
| 285 while (start >= 1 && state._text[start - 1] != '\n') --start; | 330 while (start >= 1 && state._text[start - 1] != '\n') --start; |
| 286 while (end < state._text.length && state._text[end] != '\n') ++end; | 331 while (end < state._text.length && state._text[end] != '\n') ++end; |
| 287 var line = state._text.substring(start, end); | 332 var line = state._text.substring(start, end); |
| 288 var indicator = ''; | 333 var indicator = ''; |
| 289 for (var i = 0; i < line.length && start + i < state.max_pos; i++) | 334 for (var i = 0; i < line.length && start + i < state.max_pos; i++) |
| 290 indicator = ' $indicator'; | 335 indicator = ' $indicator'; |
| 291 indicator = '$indicator^'; | 336 indicator = '$indicator^'; |
| 292 // TODO: Convert to an exception. | 337 // TODO: Convert to an exception. |
| 293 print(message); | 338 print(message); |
| 294 print(line); | 339 print(line); |
| 295 print(indicator); | 340 print(indicator); |
| 296 return null; | 341 return null; |
| 297 } | 342 } |
| 298 } | 343 } |
| 299 | 344 |
| 300 class Symbol { | 345 class Symbol { |
| 301 final String name; | 346 final String name; |
| 302 final Grammar grammar; | 347 final Grammar grammar; |
| 303 _Rule _rule; | 348 _Rule _rule; |
| 304 | 349 |
| 305 Symbol(this.name, this.grammar); | 350 Symbol(this.name, this.grammar); |
| 306 | 351 |
| 307 void set def(rule) { | 352 void set def(rule) { |
| 308 assert(_rule == null); // Assign once. | 353 assert(_rule == null); // Assign once. |
| 309 _rule = _compile(rule); | 354 _rule = _compile(rule); |
| 310 } | 355 } |
| 311 | 356 |
| 312 toString() => _rule == null ? '<$name>' : '<$name = $_rule>'; | 357 toString() => _rule == null ? '<$name>' : '<$name = $_rule>'; |
| 313 } | 358 } |
| 314 | 359 |
| 315 | |
| 316 class _ParserState { | 360 class _ParserState { |
| 317 _ParserState(this._text, {_Rule whitespace}) { | 361 _ParserState(this._text, {_Rule whitespace}) { |
| 318 _end = this._text.length; | 362 _end = this._text.length; |
| 319 whitespaceRule = whitespace; | 363 whitespaceRule = whitespace; |
| 320 max_rule = []; | 364 max_rule = []; |
| 321 } | 365 } |
| 322 | 366 |
| 323 String _text; | 367 String _text; |
| 324 int _end; | 368 int _end; |
| 325 | 369 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 338 * of the rule is something sensible to put in a message. | 382 * of the rule is something sensible to put in a message. |
| 339 */ | 383 */ |
| 340 abstract class _Expectable { | 384 abstract class _Expectable { |
| 341 String description(); | 385 String description(); |
| 342 } | 386 } |
| 343 | 387 |
| 344 class _Rule { | 388 class _Rule { |
| 345 const _Rule(); | 389 const _Rule(); |
| 346 // Returns null for a match failure or [pos, ast] for success. | 390 // Returns null for a match failure or [pos, ast] for success. |
| 347 match(_ParserState state, int pos) { | 391 match(_ParserState state, int pos) { |
| 348 if (! state.inWhitespaceMode) { | 392 if (!state.inWhitespaceMode) { |
| 349 pos = _skip_whitespace(state, pos); | 393 pos = _skip_whitespace(state, pos); |
| 350 } | 394 } |
| 351 return matchAfterWS(state, pos); | 395 return matchAfterWS(state, pos); |
| 352 } | 396 } |
| 353 | 397 |
| 354 // Faster entry point for matching a sub-rule that is matched to the start | 398 // Faster entry point for matching a sub-rule that is matched to the start |
| 355 // position of the super-rule. Whitespace has already been skipped so no need | 399 // position of the super-rule. Whitespace has already been skipped so no need |
| 356 // to try to skip it again. | 400 // to try to skip it again. |
| 357 matchAfterWS(_ParserState state, int pos) { | 401 matchAfterWS(_ParserState state, int pos) { |
| 358 if (state.inhibitExpectedTrackingDepth == 0) { | 402 if (state.inhibitExpectedTrackingDepth == 0) { |
| (...skipping 21 matching lines...) Expand all Loading... |
| 380 | 424 |
| 381 // Does the rule generate a value (AST) with the match? | 425 // Does the rule generate a value (AST) with the match? |
| 382 bool get generatesValue => false; | 426 bool get generatesValue => false; |
| 383 | 427 |
| 384 get defaultValue => null; | 428 get defaultValue => null; |
| 385 } | 429 } |
| 386 | 430 |
| 387 int _skip_whitespace(state, pos) { | 431 int _skip_whitespace(state, pos) { |
| 388 // Returns the next non-whitespace position. | 432 // Returns the next non-whitespace position. |
| 389 // This is done by matching the optional whitespaceRule with the current text. | 433 // This is done by matching the optional whitespaceRule with the current text. |
| 390 if (state.whitespaceRule == null) | 434 if (state.whitespaceRule == null) return pos; |
| 391 return pos; | |
| 392 state.inWhitespaceMode = true; | 435 state.inWhitespaceMode = true; |
| 393 state.inhibitExpectedTrackingDepth++; | 436 state.inhibitExpectedTrackingDepth++; |
| 394 while (true) { | 437 while (true) { |
| 395 var match = state.whitespaceRule.match(state, pos); | 438 var match = state.whitespaceRule.match(state, pos); |
| 396 if (match == null) | 439 if (match == null) break; |
| 397 break; | |
| 398 pos = match[0]; | 440 pos = match[0]; |
| 399 } | 441 } |
| 400 state.inWhitespaceMode = false; | 442 state.inWhitespaceMode = false; |
| 401 state.inhibitExpectedTrackingDepth--; | 443 state.inhibitExpectedTrackingDepth--; |
| 402 return pos; | 444 return pos; |
| 403 } | 445 } |
| 404 | 446 |
| 405 | |
| 406 _Rule _compileOptional(rule) { | 447 _Rule _compileOptional(rule) { |
| 407 return rule == null ? null : _compile(rule); | 448 return rule == null ? null : _compile(rule); |
| 408 } | 449 } |
| 409 | 450 |
| 410 _Rule _compile(rule) { | 451 _Rule _compile(rule) { |
| 411 if (rule is _Rule) | 452 if (rule is _Rule) return rule; |
| 412 return rule; | 453 if (rule is String) return new _StringRule(rule); |
| 413 if (rule is String) | 454 if (rule is Symbol) return new _SymbolRule(rule); |
| 414 return new _StringRule(rule); | 455 if (rule is RegExp) return new _RegExpRule(rule); |
| 415 if (rule is Symbol) | |
| 416 return new _SymbolRule(rule); | |
| 417 if (rule is RegExp) | |
| 418 return new _RegExpRule(rule); | |
| 419 if (rule is List) { | 456 if (rule is List) { |
| 420 return _compileMultiRule( | 457 return _compileMultiRule( |
| 421 rule, true, | 458 rule, |
| 459 true, |
| 422 (compiledRules, valueCount, reducer) => | 460 (compiledRules, valueCount, reducer) => |
| 423 new _SequenceRule(compiledRules, valueCount, reducer)); | 461 new _SequenceRule(compiledRules, valueCount, reducer)); |
| 424 } | 462 } |
| 425 throw new Exception('Cannot compile rule: $rule'); | 463 throw new Exception('Cannot compile rule: $rule'); |
| 426 } | 464 } |
| 427 | 465 |
| 428 class _EndOfInputRule extends _Rule { | 466 class _EndOfInputRule extends _Rule { |
| 429 _match(_ParserState state, int pos) { | 467 _match(_ParserState state, int pos) { |
| 430 if (pos == state._end) | 468 if (pos == state._end) return [pos, null]; |
| 431 return [pos, null]; | |
| 432 return null; | 469 return null; |
| 433 } | 470 } |
| 434 | 471 |
| 435 toString() => 'END'; | 472 toString() => 'END'; |
| 436 } | 473 } |
| 437 | 474 |
| 438 class _ErrorRule extends _Rule { | 475 class _ErrorRule extends _Rule { |
| 439 String message; | 476 String message; |
| 440 _ErrorRule(String this.message); | 477 _ErrorRule(String this.message); |
| 441 _match(_ParserState state, int pos) { | 478 _match(_ParserState state, int pos) { |
| 442 throw new ParseError(message); | 479 throw new ParseError(message); |
| 443 } | 480 } |
| 444 | 481 |
| 445 toString() => 'ERROR($message)'; | 482 toString() => 'ERROR($message)'; |
| 446 } | 483 } |
| 447 | 484 |
| 448 class _CharCodeRule extends _Rule { | 485 class _CharCodeRule extends _Rule { |
| 449 Function _predicate; | 486 Function _predicate; |
| 450 var _name; | 487 var _name; |
| 451 _CharCodeRule(this._predicate, this._name); | 488 _CharCodeRule(this._predicate, this._name); |
| 452 _match(_ParserState state, int pos) { | 489 _match(_ParserState state, int pos) { |
| 453 if (pos == state._end) | 490 if (pos == state._end) return null; |
| 454 return null; | |
| 455 int code = state._text.codeUnitAt(pos); | 491 int code = state._text.codeUnitAt(pos); |
| 456 if (_predicate(code)) | 492 if (_predicate(code)) return [pos + 1, null]; |
| 457 return [pos + 1, null]; | |
| 458 return null; | 493 return null; |
| 459 } | 494 } |
| 460 | 495 |
| 461 toString() => _name == null ? 'CHARCODE($_predicate)' : 'CHARCODE($_name)'; | 496 toString() => _name == null ? 'CHARCODE($_predicate)' : 'CHARCODE($_name)'; |
| 462 } | 497 } |
| 463 | 498 |
| 464 class _AnyCharRule extends _Rule { | 499 class _AnyCharRule extends _Rule { |
| 465 const _AnyCharRule(); | 500 const _AnyCharRule(); |
| 466 _match(_ParserState state, int pos) { | 501 _match(_ParserState state, int pos) { |
| 467 if (pos == state._end) | 502 if (pos == state._end) return null; |
| 468 return null; | |
| 469 return [pos + 1, null]; | 503 return [pos + 1, null]; |
| 470 } | 504 } |
| 471 | 505 |
| 472 toString() => 'CHAR()'; | 506 toString() => 'CHAR()'; |
| 473 } | 507 } |
| 474 | 508 |
| 475 class _SymbolRule extends _Rule { | 509 class _SymbolRule extends _Rule { |
| 476 final Symbol _symbol; | 510 final Symbol _symbol; |
| 477 _SymbolRule(Symbol this._symbol); | 511 _SymbolRule(Symbol this._symbol); |
| 478 _match(_ParserState state, int pos) { | 512 _match(_ParserState state, int pos) { |
| 479 if (_symbol._rule == null) | 513 if (_symbol._rule == null) |
| 480 throw new Exception("Symbol '${_symbol.name}' is undefined"); | 514 throw new Exception("Symbol '${_symbol.name}' is undefined"); |
| 481 return _symbol._rule.match(state, pos); | 515 return _symbol._rule.match(state, pos); |
| 482 } | 516 } |
| 483 | 517 |
| 484 bool get generatesValue => true; | 518 bool get generatesValue => true; |
| 485 | 519 |
| 486 toString() => '<${_symbol.name}>'; | 520 toString() => '<${_symbol.name}>'; |
| 487 } | 521 } |
| 488 | 522 |
| 489 class _SkipRule extends _Rule { | 523 class _SkipRule extends _Rule { |
| 490 // A rule that has no value. | 524 // A rule that has no value. |
| 491 _Rule _rule; | 525 _Rule _rule; |
| 492 _SkipRule(_Rule this._rule); | 526 _SkipRule(_Rule this._rule); |
| 493 _match(_ParserState state, int pos) { | 527 _match(_ParserState state, int pos) { |
| 494 var match = _rule.matchAfterWS(state, pos); | 528 var match = _rule.matchAfterWS(state, pos); |
| 495 if (match == null) | 529 if (match == null) return null; |
| 496 return null; | |
| 497 return [match[0], null]; | 530 return [match[0], null]; |
| 498 } | 531 } |
| 499 | 532 |
| 500 toString() => 'TOKEN($_rule)'; | 533 toString() => 'TOKEN($_rule)'; |
| 501 } | 534 } |
| 502 | 535 |
| 503 class _StringRule extends _Rule implements _Expectable { | 536 class _StringRule extends _Rule implements _Expectable { |
| 504 final String _string; | 537 final String _string; |
| 505 int _len; | 538 int _len; |
| 506 _StringRule(this._string) { | 539 _StringRule(this._string) { |
| 507 _len = _string.length; | 540 _len = _string.length; |
| 508 } | 541 } |
| 509 | 542 |
| 510 _match(_ParserState state, int pos) { | 543 _match(_ParserState state, int pos) { |
| 511 if (pos + _len > state._end) | 544 if (pos + _len > state._end) return null; |
| 512 return null; | |
| 513 for (int i = 0; i < _len; i++) { | 545 for (int i = 0; i < _len; i++) { |
| 514 if (state._text.codeUnitAt(pos + i) != _string.codeUnitAt(i)) | 546 if (state._text.codeUnitAt(pos + i) != _string.codeUnitAt(i)) return null; |
| 515 return null; | |
| 516 } | 547 } |
| 517 return [pos + _len, null]; | 548 return [pos + _len, null]; |
| 518 } | 549 } |
| 519 | 550 |
| 520 //get defaultValue => _string; | 551 //get defaultValue => _string; |
| 521 | 552 |
| 522 toString() => '"$_string"'; | 553 toString() => '"$_string"'; |
| 523 | 554 |
| 524 description() => "'$_string'"; | 555 description() => "'$_string'"; |
| 525 } | 556 } |
| (...skipping 23 matching lines...) Expand all Loading... |
| 549 return match; | 580 return match; |
| 550 } | 581 } |
| 551 | 582 |
| 552 toString() => _name; | 583 toString() => _name; |
| 553 | 584 |
| 554 description() => _name == null ? '?' : _name; | 585 description() => _name == null ? '?' : _name; |
| 555 } | 586 } |
| 556 | 587 |
| 557 class _TextValueRule extends _Rule { | 588 class _TextValueRule extends _Rule { |
| 558 final _Rule _rule; | 589 final _Rule _rule; |
| 559 final _extract; // Function | 590 final _extract; // Function |
| 560 | 591 |
| 561 _TextValueRule(_Rule this._rule, Function this._extract); | 592 _TextValueRule(_Rule this._rule, Function this._extract); |
| 562 | 593 |
| 563 _match(_ParserState state, int pos) { | 594 _match(_ParserState state, int pos) { |
| 564 var match = _rule.matchAfterWS(state, pos); | 595 var match = _rule.matchAfterWS(state, pos); |
| 565 if (match == null) { | 596 if (match == null) { |
| 566 return null; | 597 return null; |
| 567 } | 598 } |
| 568 var endPos = match[0]; | 599 var endPos = match[0]; |
| 569 return [endPos, _extract(state._text, pos, endPos)]; | 600 return [endPos, _extract(state._text, pos, endPos)]; |
| 570 } | 601 } |
| 571 | 602 |
| 572 bool get generatesValue => true; | 603 bool get generatesValue => true; |
| 573 | 604 |
| 574 toString() => 'TEXT($_rule)'; | 605 toString() => 'TEXT($_rule)'; |
| 575 } | 606 } |
| 576 | 607 |
| 577 _Rule _compileMultiRule(List rules, | 608 _Rule _compileMultiRule( |
| 578 bool allowReducer, | 609 List rules, bool allowReducer, finish(compiledRules, valueCount, reducer)) { |
| 579 finish(compiledRules, valueCount, reducer)) { | |
| 580 int valueCount = 0; | 610 int valueCount = 0; |
| 581 List compiledRules = new List<_Rule>(); | 611 List compiledRules = new List<_Rule>(); |
| 582 Function reducer; | 612 Function reducer; |
| 583 for (var rule in rules) { | 613 for (var rule in rules) { |
| 584 if (reducer != null) | 614 if (reducer != null) |
| 585 throw new Exception('Reducer must be last in sequence: $rule'); | 615 throw new Exception('Reducer must be last in sequence: $rule'); |
| 586 if (rule is Function) { | 616 if (rule is Function) { |
| 587 if (allowReducer) | 617 if (allowReducer) |
| 588 reducer = rule; | 618 reducer = rule; |
| 589 else | 619 else |
| 590 throw new Exception('Bad rule: "$rule"'); | 620 throw new Exception('Bad rule: "$rule"'); |
| 591 } else { | 621 } else { |
| 592 _Rule compiledRule = _compile(rule); | 622 _Rule compiledRule = _compile(rule); |
| 593 if (compiledRule.generatesValue) | 623 if (compiledRule.generatesValue) ++valueCount; |
| 594 ++valueCount; | |
| 595 compiledRules.add(compiledRule); | 624 compiledRules.add(compiledRule); |
| 596 } | 625 } |
| 597 } | 626 } |
| 598 return finish(compiledRules, valueCount, reducer); | 627 return finish(compiledRules, valueCount, reducer); |
| 599 } | 628 } |
| 600 | 629 |
| 601 String _formatMultiRule(String functor, List rules) { | 630 String _formatMultiRule(String functor, List rules) { |
| 602 var sb = new StringBuffer(functor); | 631 var sb = new StringBuffer(functor); |
| 603 sb.write('('); | 632 sb.write('('); |
| 604 var separator = ''; | 633 var separator = ''; |
| 605 for (var rule in rules) { | 634 for (var rule in rules) { |
| 606 sb.write(separator); | 635 sb.write(separator); |
| 607 sb.write(rule); | 636 sb.write(rule); |
| 608 separator = ','; | 637 separator = ','; |
| 609 } | 638 } |
| 610 sb.write(')'); | 639 sb.write(')'); |
| 611 return sb.toString(); | 640 return sb.toString(); |
| 612 } | 641 } |
| 613 | 642 |
| 614 class _SequenceRule extends _Rule { | 643 class _SequenceRule extends _Rule { |
| 615 // This rule matches the component rules in order. | 644 // This rule matches the component rules in order. |
| 616 final List<_Rule> _rules; | 645 final List<_Rule> _rules; |
| 617 final int _generatingSubRules; | 646 final int _generatingSubRules; |
| 618 final Function _reducer; | 647 final Function _reducer; |
| 619 bool _generatesValue; | 648 bool _generatesValue; |
| 620 _SequenceRule(List<_Rule> this._rules, | 649 _SequenceRule(List<_Rule> this._rules, int this._generatingSubRules, |
| 621 int this._generatingSubRules, | 650 Function this._reducer) { |
| 622 Function this._reducer) { | |
| 623 _generatesValue = _generatingSubRules > 0 || _reducer != null; | 651 _generatesValue = _generatingSubRules > 0 || _reducer != null; |
| 624 } | 652 } |
| 625 | 653 |
| 626 _match(state, pos) { | 654 _match(state, pos) { |
| 627 var sequence = []; | 655 var sequence = []; |
| 628 for (var rule in _rules) { | 656 for (var rule in _rules) { |
| 629 var match = rule.match(state, pos); | 657 var match = rule.match(state, pos); |
| 630 if (match == null) | 658 if (match == null) return null; |
| 631 return null; | |
| 632 if (rule.generatesValue) { | 659 if (rule.generatesValue) { |
| 633 var ast = match[1]; | 660 var ast = match[1]; |
| 634 sequence.add(ast); | 661 sequence.add(ast); |
| 635 } | 662 } |
| 636 pos = match[0]; | 663 pos = match[0]; |
| 637 } | 664 } |
| 638 if (_reducer == null) { | 665 if (_reducer == null) { |
| 639 if (_generatingSubRules == 0) | 666 if (_generatingSubRules == 0) return [pos, null]; |
| 640 return [pos, null]; | 667 if (_generatingSubRules == 1) return [pos, sequence[0]]; |
| 641 if (_generatingSubRules == 1) | |
| 642 return [pos, sequence[0]]; | |
| 643 return [pos, sequence]; | 668 return [pos, sequence]; |
| 644 } else { | 669 } else { |
| 645 return [pos, _apply(_reducer, sequence)]; | 670 return [pos, _apply(_reducer, sequence)]; |
| 646 } | 671 } |
| 647 } | 672 } |
| 648 | 673 |
| 649 bool get generatesValue => _generatesValue; | 674 bool get generatesValue => _generatesValue; |
| 650 | 675 |
| 651 toString() => _formatMultiRule('SEQ', _rules); | 676 toString() => _formatMultiRule('SEQ', _rules); |
| 652 } | 677 } |
| (...skipping 23 matching lines...) Expand all Loading... |
| 676 bool get generatesValue => true; | 701 bool get generatesValue => true; |
| 677 | 702 |
| 678 toString() => _formatMultiRule('OR', _rules); | 703 toString() => _formatMultiRule('OR', _rules); |
| 679 } | 704 } |
| 680 | 705 |
| 681 class _OptionalRule extends _Rule { | 706 class _OptionalRule extends _Rule { |
| 682 _Rule _rule; | 707 _Rule _rule; |
| 683 _OptionalRule(_Rule this._rule); | 708 _OptionalRule(_Rule this._rule); |
| 684 _match(_ParserState state, int pos) { | 709 _match(_ParserState state, int pos) { |
| 685 var match = _rule.match(state, pos); | 710 var match = _rule.match(state, pos); |
| 686 if (_rule.generatesValue) | 711 if (_rule.generatesValue) return match == null ? [pos, null] : match; |
| 687 return match == null | 712 return match == null ? [pos, false] : [match[0], true]; |
| 688 ? [pos, null] | |
| 689 : match; | |
| 690 return match == null | |
| 691 ? [pos, false] | |
| 692 : [match[0], true]; | |
| 693 } | 713 } |
| 694 | 714 |
| 695 bool get generatesValue => true; | 715 bool get generatesValue => true; |
| 696 | 716 |
| 697 toString() => 'MAYBE($_rule)'; | 717 toString() => 'MAYBE($_rule)'; |
| 698 } | 718 } |
| 699 | 719 |
| 700 class _ContextRule extends _Rule { | 720 class _ContextRule extends _Rule { |
| 701 _Rule _rule; | 721 _Rule _rule; |
| 702 _ContextRule(_Rule this._rule); | 722 _ContextRule(_Rule this._rule); |
| 703 _match(_ParserState state, int pos) { | 723 _match(_ParserState state, int pos) { |
| 704 // TODO: protect error state. | 724 // TODO: protect error state. |
| 705 var match = _rule._match(state, pos); | 725 var match = _rule._match(state, pos); |
| 706 if (match == null) | 726 if (match == null) return null; |
| 707 return null; | |
| 708 return [pos, null]; | 727 return [pos, null]; |
| 709 } | 728 } |
| 710 | 729 |
| 711 toString() => 'AT($_rule)'; | 730 toString() => 'AT($_rule)'; |
| 712 } | 731 } |
| 713 | 732 |
| 714 class _NegativeContextRule extends _Rule { | 733 class _NegativeContextRule extends _Rule { |
| 715 _Rule _rule; | 734 _Rule _rule; |
| 716 _NegativeContextRule(_Rule this._rule); | 735 _NegativeContextRule(_Rule this._rule); |
| 717 _match(_ParserState state, int pos) { | 736 _match(_ParserState state, int pos) { |
| 718 // TODO: protect error state. | 737 // TODO: protect error state. |
| 719 var match = _rule._match(state, pos); | 738 var match = _rule._match(state, pos); |
| 720 if (match == null) | 739 if (match == null) return [pos, null]; |
| 721 return [pos, null]; | |
| 722 return null; | 740 return null; |
| 723 } | 741 } |
| 724 | 742 |
| 725 toString() => 'NOT($_rule)'; | 743 toString() => 'NOT($_rule)'; |
| 726 } | 744 } |
| 727 | 745 |
| 728 class _RepeatRule extends _Rule { | 746 class _RepeatRule extends _Rule { |
| 729 // Matches zero, one or more items. | 747 // Matches zero, one or more items. |
| 730 _Rule _rule; | 748 _Rule _rule; |
| 731 _Rule _separator; | 749 _Rule _separator; |
| 732 int _min; | 750 int _min; |
| 733 | 751 |
| 734 _RepeatRule(this._rule, this._separator, this._min); | 752 _RepeatRule(this._rule, this._separator, this._min); |
| 735 | 753 |
| 736 _match(state, pos) { | 754 _match(state, pos) { |
| 737 // First match. | 755 // First match. |
| 738 var match = _rule.match(state, pos); | 756 var match = _rule.match(state, pos); |
| 739 if (match == null) | 757 if (match == null) if (_min == 0) |
| 740 if (_min == 0) | 758 return [pos, []]; |
| 741 return [pos, []]; | 759 else |
| 742 else | 760 return null; |
| 743 return null; | |
| 744 pos = match[0]; | 761 pos = match[0]; |
| 745 var result = [match[1]]; | 762 var result = [match[1]]; |
| 746 | 763 |
| 747 // Subsequent matches: | 764 // Subsequent matches: |
| 748 while (true) { | 765 while (true) { |
| 749 var newPos = pos; | 766 var newPos = pos; |
| 750 if (_separator != null) { | 767 if (_separator != null) { |
| 751 match = _separator.match(state, pos); | 768 match = _separator.match(state, pos); |
| 752 if (match == null) | 769 if (match == null) return [pos, result]; |
| 753 return [pos, result]; | |
| 754 newPos = match[0]; | 770 newPos = match[0]; |
| 755 } | 771 } |
| 756 match = _rule.match(state, newPos); | 772 match = _rule.match(state, newPos); |
| 757 if (match == null) | 773 if (match == null) return [pos, result]; |
| 758 return [pos, result]; | |
| 759 pos = match[0]; | 774 pos = match[0]; |
| 760 result.add(match[1]); | 775 result.add(match[1]); |
| 761 } | 776 } |
| 762 } | 777 } |
| 763 | 778 |
| 764 bool get generatesValue => true; | 779 bool get generatesValue => true; |
| 765 | 780 |
| 766 toString() => 'MANY(min:$_min, $_rule${_separator==null?'':", sep: $_separator
"})'; | 781 toString() => |
| 782 'MANY(min:$_min, $_rule${_separator==null?'':", sep: $_separator"})'; |
| 767 } | 783 } |
| 768 | 784 |
| 769 class _MemoRule extends _Rule { | 785 class _MemoRule extends _Rule { |
| 770 final _Rule _rule; | 786 final _Rule _rule; |
| 771 | 787 |
| 772 var parseInstance; | 788 var parseInstance; |
| 773 | 789 |
| 774 // A map from position to result. Can this be replaced with something | 790 // A map from position to result. Can this be replaced with something |
| 775 // smaller? | 791 // smaller? |
| 776 // TODO: figure out how to discard the map and parseInstance after parsing. | 792 // TODO: figure out how to discard the map and parseInstance after parsing. |
| 777 Map<int,Object> map; | 793 Map<int, Object> map; |
| 778 | 794 |
| 779 _MemoRule(this._rule); | 795 _MemoRule(this._rule); |
| 780 | 796 |
| 781 _match(state, pos) { | 797 _match(state, pos) { |
| 782 // See if we are still parsing the same input. Relies on the fact that the | 798 // See if we are still parsing the same input. Relies on the fact that the |
| 783 // input is a string and strings are immutable. | 799 // input is a string and strings are immutable. |
| 784 if (!identical(parseInstance, state._text)) { | 800 if (!identical(parseInstance, state._text)) { |
| 785 map = new Map<int,Object>(); | 801 map = new Map<int, Object>(); |
| 786 parseInstance = state._text; | 802 parseInstance = state._text; |
| 787 } | 803 } |
| 788 // TODO: does this have to check or preserve parse state (like | 804 // TODO: does this have to check or preserve parse state (like |
| 789 // inWhitespaceMode, error position info etc?) | 805 // inWhitespaceMode, error position info etc?) |
| 790 // Stored result can be null (memoized failure). | 806 // Stored result can be null (memoized failure). |
| 791 if (map.containsKey(pos)) { | 807 if (map.containsKey(pos)) { |
| 792 return map[pos]; | 808 return map[pos]; |
| 793 } | 809 } |
| 794 var match = _rule.match(state, pos); | 810 var match = _rule.match(state, pos); |
| 795 map[pos] = match; | 811 map[pos] = match; |
| 796 return match; | 812 return match; |
| 797 } | 813 } |
| 798 | 814 |
| 799 bool get generatesValue => _rule.generatesValue; | 815 bool get generatesValue => _rule.generatesValue; |
| 800 | 816 |
| 801 toString() => 'MEMO($_rule)'; | 817 toString() => 'MEMO($_rule)'; |
| 802 } | 818 } |
| 803 | 819 |
| 804 _apply(fn, List args) { | 820 _apply(fn, List args) { |
| 805 switch (args.length) { | 821 switch (args.length) { |
| 806 case 0: return fn(); | 822 case 0: |
| 807 case 1: return fn(args[0]); | 823 return fn(); |
| 808 case 2: return fn(args[0], args[1]); | 824 case 1: |
| 809 case 3: return fn(args[0], args[1], args[2]); | 825 return fn(args[0]); |
| 810 case 4: return fn(args[0], args[1], args[2], args[3]); | 826 case 2: |
| 811 case 5: return fn(args[0], args[1], args[2], args[3], args[4]); | 827 return fn(args[0], args[1]); |
| 812 case 6: return fn(args[0], args[1], args[2], args[3], args[4], | 828 case 3: |
| 813 args[5]); | 829 return fn(args[0], args[1], args[2]); |
| 814 case 7: return fn(args[0], args[1], args[2], args[3], args[4], | 830 case 4: |
| 815 args[5], args[6]); | 831 return fn(args[0], args[1], args[2], args[3]); |
| 816 case 8: return fn(args[0], args[1], args[2], args[3], args[4], | 832 case 5: |
| 817 args[5], args[6], args[7]); | 833 return fn(args[0], args[1], args[2], args[3], args[4]); |
| 818 case 9: return fn(args[0], args[1], args[2], args[3], args[4], | 834 case 6: |
| 819 args[5], args[6], args[7], args[8]); | 835 return fn(args[0], args[1], args[2], args[3], args[4], args[5]); |
| 820 case 10: return fn(args[0], args[1], args[2], args[3], args[4], | 836 case 7: |
| 821 args[5], args[6], args[7], args[8], args[9]); | 837 return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6]); |
| 838 case 8: |
| 839 return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6], |
| 840 args[7]); |
| 841 case 9: |
| 842 return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6], |
| 843 args[7], args[8]); |
| 844 case 10: |
| 845 return fn(args[0], args[1], args[2], args[3], args[4], args[5], args[6], |
| 846 args[7], args[8], args[9]); |
| 822 | 847 |
| 823 default: | 848 default: |
| 824 throw new Exception('Too many arguments in _apply: $args'); | 849 throw new Exception('Too many arguments in _apply: $args'); |
| 825 } | 850 } |
| 826 } | 851 } |
| 827 | 852 |
| 828 List _unspread(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z) { | 853 List _unspread(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, |
| 854 w, x, y, z) { |
| 829 List list = new List(); | 855 List list = new List(); |
| 830 add(element) { if (element != null) list.add(element); } | 856 add(element) { |
| 857 if (element != null) list.add(element); |
| 858 } |
| 859 |
| 831 add(a); | 860 add(a); |
| 832 add(b); | 861 add(b); |
| 833 add(c); | 862 add(c); |
| 834 add(d); | 863 add(d); |
| 835 add(e); | 864 add(e); |
| 836 add(f); | 865 add(f); |
| 837 add(g); | 866 add(g); |
| 838 add(h); | 867 add(h); |
| 839 add(i); | 868 add(i); |
| 840 add(j); | 869 add(j); |
| 841 add(k); | 870 add(k); |
| 842 add(l); | 871 add(l); |
| 843 add(m); | 872 add(m); |
| 844 add(n); | 873 add(n); |
| 845 add(o); | 874 add(o); |
| 846 add(p); | 875 add(p); |
| 847 add(q); | 876 add(q); |
| 848 add(r); | 877 add(r); |
| 849 add(s); | 878 add(s); |
| 850 add(t); | 879 add(t); |
| 851 add(u); | 880 add(u); |
| 852 add(v); | 881 add(v); |
| 853 add(w); | 882 add(w); |
| 854 add(x); | 883 add(x); |
| 855 add(y); | 884 add(y); |
| 856 add(z); | 885 add(z); |
| 857 return list; | 886 return list; |
| 858 } | 887 } |
| OLD | NEW |