Index: utils/peg/pegparser_test.dart |
diff --git a/utils/peg/pegparser_test.dart b/utils/peg/pegparser_test.dart |
new file mode 100644 |
index 0000000000000000000000000000000000000000..98cd3dc2605a5b1d6cb7e9c32169113859250356 |
--- /dev/null |
+++ b/utils/peg/pegparser_test.dart |
@@ -0,0 +1,346 @@ |
+// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
+// for details. All rights reserved. Use of this source code is governed by a |
+// BSD-style license that can be found in the LICENSE file. |
+ |
+#import('pegparser.dart'); |
+ |
+testParens() { |
+ Grammar g = new Grammar(); |
+ Symbol a = g['A']; |
+ |
+ a.def = ['(', MANY(a, min:0), ')', (a) => a]; |
+ |
+ check(g, a, "", null); |
+ check(g, a, "()", '[]'); |
+ check(g, a, "(()())", '[[],[]]'); |
+ check(g, a, "(()((()))())", '[[],[[[]]],[]]'); |
+} |
+ |
+testBlockComment() { |
+ |
+ // Block comment in whitespace. |
+ |
+ Grammar g = new Grammar(); |
+ Symbol blockComment = g['blockComment']; |
+ |
+ blockComment.def = |
+ ['/*', |
+ MANY(OR([blockComment, |
+ [NOT('*/'), CHAR()], |
+ [END, ERROR('EOF in block comment')] |
+ ]), |
+ min: 0), |
+ '*/']; |
+ print(blockComment); |
+ |
+ var a = MANY(TEXT('x')); |
+ |
+ g.whitespace = OR([g.whitespace, blockComment]); |
+ |
+ check(g, a, "x /**/ x", '[x,x]'); |
+ check(g, a, "x /*/**/*/ x", '[x,x]'); |
+ check(g, a, "x /*/***/ x", 'EOF in block comment'); |
+ check(g, a, "x /*/*/x**/**/ x", '[x,x]'); |
+ |
+ check(g, a, @""" |
+/* Comment */ |
+/* Following comment with /* nested comment*/ */ |
+x |
+/* x in comment */ |
+x /* outside comment */ |
+""", |
+ '[x,x]'); |
+} |
+ |
+testTEXT() { |
+ Grammar g = new Grammar(); |
+ |
+ // TEXT grabs the parsed text, |
+ check(g, TEXT(LEX(MANY(OR(['1','a'])))), ' 1a1 ', '1a1'); |
+ |
+ // Without the lexical context, TEXT will grab intervening whitespace. |
+ check(g, TEXT(MANY(OR(['1','a']))), ' 1a1 ', '1a1'); |
+ check(g, TEXT(MANY(OR(['1','a']))), ' 1 a 1 ', '1 a 1'); |
+ |
+ // Custom processing of the TEXT substring. |
+ var binaryNumber = |
+ TEXT(LEX(MANY(OR(['0','1']))), |
+ (str, start, end) { |
+ var r = 0; |
+ var zero = '0'.charCodeAt(0); |
+ for (int i = start; i < end; i++) |
+ r = r * 2 + (str.charCodeAt(i) - zero); |
+ return r; |
+ }); |
+ |
+ check(g, binaryNumber, ' 10101 ', 21); |
+ check(g, binaryNumber, '1010111', 87); |
+ check(g, binaryNumber, '1010 111', null); |
+} |
+ |
+testOR() { |
+ // OR matches the first match. |
+ Grammar g = new Grammar(); |
+ check(g, OR([['a', NOT(END), () => 1], |
+ ['a', () => 2], |
+ ['a', () => 3]]), |
+ 'a', 2); |
+} |
+ |
+testCODE() { |
+ Grammar g = new Grammar(); |
+ var a = TEXT(LEX('thing', MANY(CHAR('bcd')))); |
+ |
+ check(g, a, 'bbb', 'bbb'); |
+ check(g, a, 'ccc', 'ccc'); |
+ check(g, a, 'ddd', 'ddd'); |
+ check(g, a, 'bad', null); // a is outside range. |
+ check(g, a, 'bed', null); // e is outside range. |
+} |
+ |
+testC() { |
+ // Curried tree builders. |
+ binary(operation) => (second) => (first) => [operation, first, second]; |
+ unary(operation) => () => (first) => [operation, first]; |
+ reform(a, fns) { |
+ var r = a; |
+ for (var fn in fns) |
+ r = fn(r); |
+ return r; |
+ } |
+ |
+ Grammar g = new Grammar(); |
+ |
+ Symbol expression = g['expression']; |
+ Symbol postfix_e = g['postfix_e']; |
+ Symbol unary_e = g['unary_e']; |
+ Symbol cast_e = g['cast_e']; |
+ Symbol mult_e = g['mult_e']; |
+ Symbol add_e = g['add_e']; |
+ Symbol shift_e = g['shift_e']; |
+ Symbol relational_e = g['relational_e']; |
+ Symbol equality_e = g['equality_e']; |
+ Symbol cond_e = g['cond_e']; |
+ Symbol assignment_e = g['assignment_e']; |
+ |
+ // Lexical elements. |
+ var idStartChar = CHAR( |
+ @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); |
+ var idNextChar = CHAR( |
+ @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789$_"); |
+ |
+ var id = TEXT(LEX('identifier', [idStartChar, MANY(idNextChar, min: 0)])); |
+ |
+ var lit = TEXT(LEX('literal', MANY(CHAR('0123456789')))); |
+ |
+ |
+ var type_name = id; |
+ |
+ |
+ // Expression grammar. |
+ var primary_e = OR([id, |
+ lit, |
+ ['(', expression, ')', (e) => e] |
+ ]); |
+ |
+ var postfixes = OR([['(', MANY(assignment_e, ',', 0), ')', binary('apply')], |
+ ['++', unary('postinc')], |
+ ['--', unary('postdec')], |
+ ['.', id, binary('field')], |
+ ['->', id, binary('ptr')], |
+ ]); |
+ |
+ postfix_e.def = [primary_e, MANY(postfixes, min:0), reform]; |
+ |
+ |
+ var unary_op = OR([['&', () => 'address'], |
+ ['*', () => 'indir'], |
+ ['!', () => 'not'], |
+ ['~', () => 'not'], |
+ ['-', () => 'negate'], |
+ ['+', () => 'uplus'], |
+ ]); |
+ var sizeof = LEX('sizeof', ['sizeof', NOT(idNextChar)]); |
+ |
+ Symbol unary_e_plain = g['unary_e_plain']; |
+ unary_e_plain.def = |
+ OR([ ['++', unary_e, (e) => ['preinc', e]], |
+ ['--', unary_e, (e) => ['predec', e]], |
+ [unary_op, cast_e, (o, e) => [o, e]], |
+ [sizeof, unary_e, (e) => ['sizeof-expr', e]], |
+ [sizeof, '(', type_name , ')', (t) => ['sizeof-type', t]], |
+ postfix_e |
+ ]); |
+ |
+ unary_e.def = MEMO(unary_e_plain); |
+ //unary_e.def = unary_e_plain; |
+ |
+ cast_e.def = OR([ ['(', type_name, ')', cast_e, (t, e) => ['cast', t, e]], |
+ unary_e, |
+ ]); |
+ |
+ var mult_ops = OR([['*', cast_e, binary('mult')], |
+ ['/', cast_e, binary('div')], |
+ ['%', cast_e, binary('rem')], |
+ ]); |
+ mult_e.def = [cast_e, MANY(mult_ops, min:0), reform]; |
+ |
+ var add_ops = OR([['+', mult_e, binary('add')], |
+ ['-', mult_e, binary('sub')], |
+ ]); |
+ add_e.def = [mult_e, MANY(add_ops, min:0), reform]; |
+ |
+ var shift_ops = OR([['>>', add_e, binary('shl')], |
+ ['<<', add_e, binary('shr')], |
+ ]); |
+ shift_e.def = [add_e, MANY(shift_ops, min:0), reform]; |
+ |
+ var relational_ops = OR([['<=', shift_e, binary('le')], |
+ ['>=', shift_e, binary('ge')], |
+ ['<', shift_e, binary('lt')], |
+ ['>', shift_e, binary('gt')], |
+ ]); |
+ relational_e.def = [shift_e, MANY(relational_ops, min:0), reform]; |
+ |
+ |
+ var equality_ops = OR([['==', shift_e, binary('eq')], |
+ ['!=', shift_e, binary('ne')], |
+ ]); |
+ equality_e.def = [relational_e, MANY(equality_ops, min:0), reform]; |
+ |
+ |
+ var bit_and_op = LEX('&', ['&', NOT('&')]); // Don't see '&&' and '&', '&' |
+ var bit_or_op = LEX('|', ['|', NOT('|')]); |
+ |
+ var and_e = [equality_e, MANY([bit_and_op, equality_e, binary('bitand')], min:0), reform]; |
+ var xor_e = [and_e, MANY(['^', and_e, binary('bitxor')], min:0), reform]; |
+ var or_e = [xor_e, MANY([bit_or_op, xor_e, binary('bitor')], min:0), reform]; |
+ |
+ var log_and_e = [or_e, MANY(['&&', or_e, binary('and')], min:0), reform]; |
+ |
+ var log_or_e = [log_and_e, MANY(['||', log_and_e, binary('or')], min:0), reform]; |
+ |
+ //cond_e.def = OR([ [log_or_e, '?', expression, ':', cond_e, |
+ // (p,a,b) => ['cond', p, a, b]], |
+ // log_or_e]); |
+ // Alternate version avoids reparsing log_or_e. |
+ cond_e.def = [log_or_e, MAYBE(['?', expression, ':', cond_e]), |
+ (p, r) => r == null || r == false ? p : ['cond', p, r[0], r[1]]]; |
+ |
+ var assign_op = OR([['*=', () => 'mulassign'], |
+ ['=', () => 'assign']]); |
+ |
+ // TODO: Figure out how not to re-parse a unary_e. |
+ // Order matters - cond_e can't go first since cond_e will succeed on, e.g. 'a'. |
+ assignment_e.def = OR([[unary_e, assign_op, assignment_e, |
+ (u, op, a) => [op, u, a]], |
+ cond_e]); |
+ |
+ expression.def = [assignment_e, |
+ MANY([',', assignment_e, binary('comma')], min:0), |
+ reform]; |
+ |
+ show(g, expression, 'a'); |
+ check(g, expression, 'a', 'a'); |
+ check(g, expression, '(a)', 'a'); |
+ check(g, expression, ' ( ( a ) ) ', 'a'); |
+ |
+ check(g, expression, 'a(~1,2)', '[apply,a,[[not,1],2]]'); |
+ check(g, expression, 'a(1)(x,2)', '[apply,[apply,a,[1]],[x,2]]'); |
+ check(g, expression, 'a(1,2())', '[apply,a,[1,[apply,2,[]]]]'); |
+ |
+ check(g, expression, '++a++', '[preinc,[postinc,a]]'); |
+ check(g, expression, 'a++++b', null); |
+ check(g, expression, 'a++ ++b', null); |
+ check(g, expression, 'a+ +++b', '[add,a,[preinc,[uplus,b]]]'); |
+ check(g, expression, 'a+ + ++b', '[add,a,[uplus,[preinc,b]]]'); |
+ check(g, expression, 'a+ + + +b', '[add,a,[uplus,[uplus,[uplus,b]]]]'); |
+ check(g, expression, 'a+ ++ +b', '[add,a,[preinc,[uplus,b]]]'); |
+ check(g, expression, 'a++ + +b', '[add,[postinc,a],[uplus,b]]'); |
+ check(g, expression, 'a+++ +b', '[add,[postinc,a],[uplus,b]]'); |
+ |
+ check(g, expression, '((T)f)(x)', '[apply,[cast,T,f],[x]]'); |
+ check(g, expression, '(T)f(x)', '[cast,T,[apply,f,[x]]]'); |
+ |
+ check(g, expression, 'a++*++b', '[mult,[postinc,a],[preinc,b]]'); |
+ |
+ check(g, expression, 'a<<1>>++b', '[shl,[shr,a,1],[preinc,b]]'); |
+ |
+ check(g, expression, 'a<1&&b', '[and,[lt,a,1],b]'); |
+ |
+ check(g, expression, 'a<1 & &b', '[bitand,[lt,a,1],[address,b]]'); |
+ check(g, expression, |
+ 'a ? b ? c : d : e ? f : g', |
+ '[cond,a,[cond,b,c,d],[cond,e,f,g]]'); |
+ |
+ check(g, expression, 'a,b,c', '[comma,[comma,a,b],c]'); |
+ check(g, expression, 'a=1,b,c', '[comma,[comma,[assign,a,1],b],c]'); |
+ |
+ check(g, expression, |
+ '((((((((((((a))))))))))))=1,b,c', '[comma,[comma,[assign,a,1],b],c]'); |
+ |
+ check(g, expression, 'sizeof a', '[sizeof-expr,a]'); |
+ check(g, expression, 'sizeofa', 'sizeofa'); |
+ check(g, expression, 'sizeof (a)', '[sizeof-expr,a]'); |
+} |
+ |
+ |
+show(grammar, rule, input) { |
+ print('show: "$input"'); |
+ var ast; |
+ try { |
+ ast = grammar.parse(rule, input); |
+ } catch (var exception) { |
+ if (exception is ParseError) |
+ ast = exception; |
+ else |
+ throw; |
+ } |
+ print('${printList(ast)}'); |
+} |
+ |
+void check(grammar, rule, input, expected) { |
+ // If [expected] is String then the result is coerced to string. |
+ // If [expected] is !String, the result is compared directly. |
+ print('check: "$input"'); |
+ var ast; |
+ try { |
+ ast = grammar.parse(rule, input); |
+ } catch (var exception) { |
+ ast = exception; |
+ } |
+ |
+ var formatted = ast; |
+ if (expected is String) |
+ formatted = printList(ast); |
+ |
+ Expect.equals(expected, formatted, "parse: $input"); |
+} |
+ |
+// Prints the list in [1,2,3] notation, including nested lists. |
+void printList(item) { |
+ if (item is List) { |
+ StringBuffer sb = new StringBuffer(); |
+ sb.add('['); |
+ var sep = ''; |
+ for (var x in item) { |
+ sb.add(sep); |
+ sb.add(printList(x)); |
+ sep = ','; |
+ } |
+ sb.add(']'); |
+ return sb.toString(); |
+ } |
+ if (item == null) |
+ return 'null'; |
+ return item.toString(); |
+} |
+ |
+main() { |
+ testCODE(); |
+ testParens(); |
+ testOR(); |
+ testTEXT(); |
+ testBlockComment(); |
+ testC(); |
+} |