Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(356)

Unified Diff: sdk/lib/_internal/compiler/implementation/js/nodes.dart

Issue 12276002: Add a small JS parser to ease the building of ASTs (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Streamline support for var Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: sdk/lib/_internal/compiler/implementation/js/nodes.dart
diff --git a/sdk/lib/_internal/compiler/implementation/js/nodes.dart b/sdk/lib/_internal/compiler/implementation/js/nodes.dart
index 54af9c5f97d00d3306e8822bde0a22985e234859..771955e87f709f270f2efecf233a2bfbf8f82129 100644
--- a/sdk/lib/_internal/compiler/implementation/js/nodes.dart
+++ b/sdk/lib/_internal/compiler/implementation/js/nodes.dart
@@ -932,7 +932,9 @@ class RegExpLiteral extends Expression {
class JsBuilder {
ahe 2013/02/19 10:05:25 How about moving JsBuilder and MiniJsParser to as
erikcorry 2013/02/19 10:38:29 I'd like to move them to a different file (a separ
ahe 2013/02/19 10:47:34 Good point.
const JsBuilder();
- VariableUse operator [](String name) => new VariableUse(name);
+ Expression operator [](String source) {
+ return new MiniJsParser(source).expression();
+ }
// TODO(ahe): Remove this method.
Binary equals(Expression left, Expression right) {
@@ -1050,3 +1052,302 @@ class JsBuilder {
const JsBuilder js = const JsBuilder();
LiteralString string(String value) => js.string(value);
+
+class MiniJsParserError {
+ MiniJsParserError(this.parser, this.message) { }
+
+ MiniJsParser parser;
+ String message;
+
+ String toString() {
+ var codes =
+ new List.fixedLength(parser.lastPosition, fill: charCodes.$SPACE);
+ var spaces = new String.fromCharCodes(codes);
+ return "Error in MiniJsParser:\n${parser.src}\n$spaces^\n$spaces$message\n";
+ }
+}
+
+/// Mini JavaScript parser for tiny snippets of code that we want to make into
+/// AST nodes. Handles:
+/// * identifiers.
+/// * dot access.
+/// * method calls.
+/// * [] access.
+/// * string, boolean, null and numeric literals (no backslash escapes, no hex).
sra1 2013/02/20 01:50:24 see comment below about backslashes. I'd revise to
+/// * most operators.
+/// * brackets.
+/// * var declarations.
+/// Notable things it can't do yet include:
+/// * operator precedence.
+/// * array and object literals.
+/// * new, throw, return, typeof.
+/// * statements, including any flow control (if, while, for, etc.)
+/// It's a fairly standard recursive descent parser.
+class MiniJsParser {
ahe 2013/02/19 10:05:25 I would call this "JsExpressionParser" or somethin
sra1 2013/02/20 01:50:24 I think the urge to extend to statements will be i
+ MiniJsParser(this.src)
+ : lastCategory = NONE, lastToken = null, lastPosition = 0, position = 0 {
ahe 2013/02/19 10:05:25 One line per initializer.
+ getSymbol();
+ }
+
+ int lastCategory;
+ String lastToken;
+ int lastPosition;
+ int position;
+ String src;
+
+ static const NONE = -1;
+ static const ALPHA = 0;
+ static const NUMERIC = 1;
+ static const STRING = 2;
+ static const SYMBOL = 3;
+ static const RELATION = 4;
+ static const DOT = 5;
+ static const LPAREN = 6;
+ static const RPAREN = 7;
+ static const LSQUARE = 8;
+ static const RSQUARE = 9;
+ static const COMMA = 10;
+ static const OTHER = 11;
+
+ // Make sure that ]] is two symbols.
+ bool singleCharCategory(int category) => category >= DOT;
+
+ static String categoryToString(int cat) {
+ switch (cat) {
+ case NONE: return "NONE";
+ case ALPHA: return "ALPHA";
+ case NUMERIC: return "NUMERIC";
+ case SYMBOL: return "SYMBOL";
+ case RELATION: return "RELATION";
+ case DOT: return "DOT";
+ case LPAREN: return "LPAREN";
+ case RPAREN: return "RPAREN";
+ case LSQUARE: return "LSQUARE";
+ case RSQUARE: return "RSQUARE";
+ case STRING: return "STRING";
+ case COMMA: return "COMMA";
+ case OTHER: return "OTHER";
+ }
+ return "Unknown: $cat";
+ }
+
+ static const CATEGORIES = const <int>[
+ OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, // 0-7
+ OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, // 8-15
+ OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, // 16-23
+ OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, OTHER, // 24-31
+ OTHER, RELATION, OTHER, OTHER, ALPHA, SYMBOL, SYMBOL, OTHER, // !"#$%&ยด
+ LPAREN, RPAREN, SYMBOL, SYMBOL, COMMA, SYMBOL, DOT, SYMBOL, // ()*+,-./
+ NUMERIC, NUMERIC, NUMERIC, NUMERIC, NUMERIC, // 01234
+ NUMERIC, NUMERIC, NUMERIC, NUMERIC, NUMERIC, // 56789
+ OTHER, OTHER, RELATION, RELATION, RELATION, OTHER, OTHER, // :;<=>?@
+ ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, // ABCDEFGH
+ ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, // IJKLMNOP
+ ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, // QRSTUVWX
+ ALPHA, ALPHA, LSQUARE, OTHER, RSQUARE, SYMBOL, ALPHA, OTHER, // YZ[\]^_'
+ ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, // abcdefgh
+ ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, // ijklmnop
+ ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, ALPHA, // qrstuvwx
+ ALPHA, ALPHA, OTHER, SYMBOL, OTHER, SYMBOL]; // yz{|}~
+
+ static int category(int code) {
+ if (code >= CATEGORIES.length) return OTHER;
+ return CATEGORIES[code];
+ }
+
+ void getSymbol() {
+ while (position < src.length &&
+ src.codeUnitAt(position) == charCodes.$SPACE) {
+ position++;
+ }
+ if (position == src.length) {
+ lastCategory = NONE;
+ lastToken = null;
+ lastPosition = position;
+ return;
+ }
+ int code = src.codeUnitAt(position);
+ lastPosition = position;
+ if (code == charCodes.$SQ || code == charCodes.$DQ) {
+ do {
+ position++;
+ } while (src.codeUnitAt(position) != code);
ahe 2013/02/19 10:05:25 This doesn't handle \ in strings.
erikcorry 2013/02/19 10:38:29 That's right, that's noted in the description of t
ahe 2013/02/19 10:47:34 You could throw an exception if you see a backslas
sra1 2013/02/20 01:50:24 Noooo! It works pretty well (e.g. is actually used
+ lastCategory = STRING;
+ position++;
+ lastToken = src.substring(lastPosition, position);
+ } else {
+ int cat = category(src.codeUnitAt(position));
+ int newCat;
+ do {
+ position++;
+ if (position == src.length) break;
+ newCat = category(src.codeUnitAt(position));
+ } while (!singleCharCategory(cat) &&
+ (cat == newCat ||
+ (cat == ALPHA && newCat == NUMERIC) || // eg. level42.
+ (cat == NUMERIC && newCat == DOT) || // eg. 3.1415
ahe 2013/02/19 10:05:25 I think this will allow things like: 1.1.1.
erikcorry 2013/02/19 10:38:29 Yes, it will. I think my preferred solution is "d
ahe 2013/02/19 10:47:34 You could add code like this after line 1191: if
sra1 2013/02/20 01:50:24 We should avoid putting checks in the printer that
erikcorry 2013/02/20 14:39:09 The printer already performs this check (at least
+ (cat == SYMBOL && newCat == RELATION))); // eg. +=.
+ lastCategory = cat;
+ lastToken = src.substring(lastPosition, position);
+ }
+ }
+
+ void expectCategory(int cat) {
+ if (cat != lastCategory) {
+ throw new MiniJsParserError(this, "Expected ${categoryToString(cat)}");
+ }
+ getSymbol();
+ }
+
+ bool acceptCategory(int cat) {
+ if (cat == lastCategory) {
+ getSymbol();
+ return true;
+ }
+ return false;
+ }
+
+ bool acceptString(String string) {
+ if (lastToken == string) {
+ getSymbol();
+ return true;
+ }
+ return false;
+ }
+
+ Expression parsePrimary() {
+ String last = lastToken;
+ if (acceptCategory(ALPHA)) {
+ if (last == "true") {
+ return new LiteralBool(true);
+ } else if (last == "false") {
+ return new LiteralBool(false);
+ } else if (last == "null") {
+ return new LiteralNull();
+ } else {
+ return new VariableUse(last);
+ }
+ } else if (acceptCategory(LPAREN)) {
+ Expression expression = parseExpression();
+ expectCategory(RPAREN);
+ return expression;
+ } else if (acceptCategory(STRING)) {
+ return new LiteralString(last);
+ } else if (acceptCategory(NUMERIC)) {
+ return new LiteralNumber(last);
+ } else {
+ throw new MiniJsParserError(this, "Expected primary expression");
+ }
+ }
+
+ Expression parseMember() {
+ Expression receiver = parsePrimary();
+ while (true) {
+ if (acceptCategory(DOT)) {
+ String identifier = lastToken;
+ expectCategory(ALPHA);
+ receiver = new PropertyAccess.field(receiver, identifier);
+ } else if (acceptCategory(LSQUARE)) {
+ Expression inBraces = parseExpression();
+ expectCategory(RSQUARE);
+ receiver = new PropertyAccess(receiver, inBraces);
+ } else {
+ return receiver;
+ }
+ }
+ }
+
+ Expression parseCall() {
+ Expression receiver = parseMember();
+ if (acceptCategory(LPAREN)) {
+ final arguments = <Expression>[];
+ if (!acceptCategory(RPAREN)) {
+ while (true) {
+ Expression argument = parseExpression();
+ arguments.add(argument);
+ if (acceptCategory(RPAREN)) break;
+ expectCategory(COMMA);
+ }
+ }
+ return new Call(receiver, arguments);
+ } else {
+ return receiver;
+ }
+ }
+
+ Expression parseBinary() {
+ // Since we don't handle precedence we don't allow two different symbols
+ // without brackets.
ahe 2013/02/19 10:05:25 Should be "without parentheses". "brackets" is amb
+ Expression lhs = parseCall();
+ String firstSymbol = lastToken;
+ while (true) {
+ String symbol = lastToken;
+ if (!acceptCategory(SYMBOL)) return lhs;
+ if (symbol != firstSymbol) {
+ throw new MiniJsParserError(
+ this, "Mixed $firstSymbol and $symbol operators without ()");
+ }
+ if (symbol == '++' || symbol == '--') {
+ lhs = new Postfix(symbol, lhs);
+ } else {
+ Expression rhs = parseCall();
+ if (symbol.endsWith("=")) {
+ // +=, -=, *= etc.
+ lhs = new Assignment.compound(lhs,
+ symbol.substring(0, symbol.length - 1),
+ rhs);
+ } else {
+ lhs = new Binary(symbol, lhs, rhs);
+ }
+ }
+ }
+ }
+
+ Expression parseRelation() {
+ if (acceptString("!")) {
+ Expression expression = parseBinary();
+ return new Prefix("!", expression);
sra1 2013/02/20 01:50:24 Looks like this silently gets "!a == true" wron
erikcorry 2013/02/20 14:39:09 Done.
+ }
+ Expression lhs = parseBinary();
+ String relation = lastToken;
+ if (!acceptCategory(RELATION)) return lhs;
+ Expression rhs = parseBinary();
+ if (relation == "=") {
+ return new Assignment(lhs, rhs);
+ } else {
+ // Regular binary operation.
+ return new Binary(relation, lhs, rhs);
+ }
+ }
+
+ Expression parseExpression() => parseRelation();
+
+ Expression parseVarDeclarationOrExpression() {
+ if (acceptString("var")) {
+ var initialization = [];
+ do {
+ String variable = lastToken;
+ expectCategory(ALPHA);
+ Expression initializer = null;
+ if (acceptString("=")) {
+ initializer = parseExpression();
+ }
+ var declaration = new VariableDeclaration(variable);
+ initialization.add(
+ new VariableInitialization(declaration, initializer));
+ } while (acceptCategory(COMMA));
+ return new VariableDeclarationList(initialization);
+ } else {
+ return parseExpression();
+ }
+ }
+
+ Expression expression() {
+ Expression expression = parseVarDeclarationOrExpression();
+ if (lastCategory != NONE && position != src.length) {
+ throw new MiniJsParserError(
+ this, "Unparsed junk: ${categoryToString(lastCategory)}");
+ }
+ return expression;
+ }
+}

Powered by Google App Engine
This is Rietveld 408576698