Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(918)

Unified Diff: frog/css/parser_css.dart

Issue 8498020: Beginning of CSS parser using frog parsering infrastructure. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: frog/css/parser_css.dart
diff --git a/frog/css/parser_css.dart b/frog/css/parser_css.dart
new file mode 100644
index 0000000000000000000000000000000000000000..6249aff081a1ac40098d208ac2d533b420f46540
--- /dev/null
+++ b/frog/css/parser_css.dart
@@ -0,0 +1,427 @@
+// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+/**
+ * A simple recursive descent parser for the dart language.
jimhug 2011/11/09 16:04:03 Old comment.
terry 2011/11/09 21:56:06 Done.
+ */
+class Parser {
+ // If true throw CSSParserExceptions for any tokenizing/parsing problems.
+ bool _errorsAsException;
+ Tokenizer tokenizer;
+
+ final SourceFile source;
+
+ // TODO(jimhug): 1. Try to kill initializers, if fails, clean this up.
+ bool _inInitializers;
nweiz 2011/11/10 00:04:39 Unused variable.
terry 2011/11/16 14:00:22 Done.
+
+ Token _previousToken;
+ Token _peekToken;
+
+ Parser(this.source, [int startOffset = 0]) {
+ tokenizer = new Tokenizer(source, true, startOffset);
+ _peekToken = tokenizer.next();
+ _previousToken = null;
+ _inInitializers = false;
+ _errorsAsException = false;
+ }
+
+ /** Generate an error if [source] has not been completely consumed. */
+ void checkEndOfFile() {
+ _eat(TokenKind.END_OF_FILE);
+ }
+
+ /** Guard to break out of parser when an unexpected end of file is found. */
+ // TODO(jimhug): Failure to call this method can lead to inifinite parser
+ // loops. Consider embracing exceptions for more errors to reduce
+ // the danger here.
+ bool isPrematureEndOfFile() {
+ if (_maybeEat(TokenKind.END_OF_FILE)) {
+ _error('unexpected end of file', _peekToken.span);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ ///////////////////////////////////////////////////////////////////
+ // Basic support methods
+ ///////////////////////////////////////////////////////////////////
+ int _peek() {
+ return _peekToken.kind;
+ }
+
+ Token _next() {
+ _previousToken = _peekToken;
+ _peekToken = tokenizer.next();
+ return _previousToken;
+ }
+
+ bool _peekKind(int kind) {
+ return _peekToken.kind == kind;
+ }
+
+ /* Is the next token a legal identifier? This includes pseudo-keywords. */
+ bool _peekIdentifier() {
+ return TokenKind.isIdentifier(_peekToken.kind);
nweiz 2011/11/10 00:04:39 Why can't you use _peekKind(TokenKind.IDENTIFIER)?
terry 2011/11/16 14:00:22 This is mimicked after frog's parser. On 2011/11/
nweiz 2011/11/23 00:59:44 Frog's parser needs it because there are multiple
+ }
+
+ bool _maybeEat(int kind) {
+ if (_peekToken.kind == kind) {
+ _previousToken = _peekToken;
+ _peekToken = tokenizer.next();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ void _eat(int kind) {
+ if (!_maybeEat(kind)) {
+ _errorExpected(TokenKind.kindToString(kind));
+ }
+ }
+
+ void _eatSemicolon() {
nweiz 2011/11/10 00:04:39 Unused method.
terry 2011/11/16 14:00:22 Unused now will use later. On 2011/11/10 00:04:39
nweiz 2011/11/23 00:59:44 I don't think it'll be useful, actually... there a
+ _eat(TokenKind.SEMICOLON);
+ }
+
+ void _errorExpected(String expected) {
+ var tok = _next();
+ var message;
+ try {
+ message = 'expected $expected, but found $tok';
+ } catch (var e) {
+ message = 'parsing error expected $expected';
+ }
+ _error(message, tok.span);
+ }
+
+ void _error(String message, [SourceSpan location=null]) {
+ if (location === null) {
+ location = _peekToken.span;
+ }
+
+ // TODO(terry): Should use below world.fatal:
+ // world.fatal(message, location); // syntax errors are fatal for now
+
+ // TODO(terry): Beginning of temp code until world.fatal enabled.
+ var text = message;
+ if (location != null) {
+ text = location.toMessageString(message);
+ }
+ print("FATAL: $text");
+
+ // Any parsing problem throw exception too.
+ if (this._errorsAsException) {
+ throw new CssParserException(message, location);
+ }
+ // TODO(terry): End of temp code until world.fatal enabled.
+ }
+
+ SourceSpan _makeSpan(int start) {
+ return new SourceSpan(source, start, _previousToken.end);
+ }
+
+ ///////////////////////////////////////////////////////////////////
+ // Top level productions
+ ///////////////////////////////////////////////////////////////////
+
+ List<SelectorGroup> expression([bool throwErrors = false]) {
nweiz 2011/11/10 00:04:39 I think your terminology is confusing here. "expre
terry 2011/11/16 14:00:22 I see your point, expression was probably a bad na
+ this._errorsAsException = throwErrors;
+ List<SelectorGroup> groups = [];
+ while (!_maybeEat(TokenKind.END_OF_FILE)) {
+ do {
+ int start = _peekToken.start;
+ groups.add(new SelectorGroup(selectorExpression(), start));
+ } while (_maybeEat(TokenKind.COMMA));
+ }
+
+ return groups;
+ }
+
+ // Templates are @{selectors} single line nothing else.
+ SelectorGroup template([bool throwErrors = false]) {
+ this._errorsAsException = throwErrors;
+ SelectorGroup selectorGroup;
+ if (!_maybeEat(TokenKind.END_OF_FILE)) {
+ selectorGroup = templateExpression();
+ if (!_maybeEat(TokenKind.END_OF_FILE)) {
nweiz 2011/11/10 00:04:39 Can you just call isPrematureEndOfFile?
terry 2011/11/22 16:40:47 Done.
+ // TODO(terry): Error should be done.
+ }
+ }
+
+ return selectorGroup;
+ }
+
+ /*
+ * Expect @{css_expression}
+ */
+ templateExpression() {
+ SelectorGroup selectorGroup = null;
+ int start = _peekToken.start;
+
+ if (_peek() == TokenKind.AT) {
nweiz 2011/11/10 00:04:39 Is there any reason to do all these if checks, rat
terry 2011/11/16 14:00:22 Main reason was more exact error. However, I thin
+ _eat(TokenKind.AT);
+ if (_peek() == TokenKind.LBRACE) {
+ _eat(TokenKind.LBRACE);
+ List<SimpleSelector> selectors= selectorExpression();
+ SelectorGroup exprResult = new SelectorGroup(selectors, start);
+
+ if (_peek() == TokenKind.RBRACE) {
+ _eat(TokenKind.RBRACE);
+ selectorGroup = exprResult;
+ }
+ }
+ }
+
+ if (selectorGroup == null) {
+ _errorExpected('css template @{css selector}');
+ } else {
+ return selectorGroup;
+ }
+ }
+
+ int classNameCheck(var selector, int matches) {
+ if (selector.isCombinatorNone()) {
+ if (matches < 0) {
+ String tooMany = selector.toString();
+ throw new CssSelectorException(
+ 'Can not mix Id selector with class selector(s). Id ' +
nweiz 2011/11/10 00:04:39 This isn't actually true. "#foo.bar" is a valid se
terry 2011/11/16 14:00:22 Of course, however this is used by the validator f
nweiz 2011/11/23 00:59:44 Why aren't we allowing arbitrary selectors in sele
+ 'selector must be singleton too many starting at $tooMany',
+ selector.span);
+ }
+
+ return ++matches;
nweiz 2011/11/10 00:04:39 Style nit: matches + 1, since you aren't actually
terry 2011/11/22 16:40:47 Done.
+ } else {
+ String error = selector.toString();
+ throw new CssSelectorException(
+ 'Selectors can not have combinators (>, +, or ~) before $error',
+ selector.span);
+ }
+ }
+
+ int elementIdCheck(var selector, int matches) {
+ if (selector.isCombinatorNone()) {
+ if (matches != 0) {
+ String tooMany = selector.toString();
+ throw new CssSelectorException(
+ 'Use of Id selector must be singleton starting at $tooMany',
+ selector.span);
+ }
+ return --matches;
nweiz 2011/11/10 00:04:39 Style nit: matches - 1
terry 2011/11/22 16:40:47 Done.
+ } else {
+ String error = selector.toString();
+ throw new CssSelectorException(
+ 'Selectors can not have combinators (>, +, or ~) before $error',
+ selector.span);
+ }
+ }
+
+ // Validate the @{css expression} only .class and #elementId are valid inside
+ // of @{...}.
+ validateTemplate(List<Node> selectors, CssWorld cssWorld) {
nweiz 2011/11/10 00:04:39 Does this really belong in the parser? It's not ac
terry 2011/11/22 16:40:47 Will be moving this out in the next checkin. On 2
+ var errorSelector;
+ bool found = false;
+
+ bool matches = 0; // < 0 IdSelectors, > 0 ClassSelector
nweiz 2011/11/10 00:04:39 You're declaring an int as a bool here. Also, it s
terry 2011/11/22 16:40:47 Thanks, caught this earlier when I merged with the
+ for (selector in selectors) {
+ found = false;
+ if (selector is ClassSelector) {
+ // Any class name starting with an underscore is a private class name
+ // that doesn't have to match the world of known classes.
+ if (!selector.name.startsWith('_')) {
+ for (className in cssWorld.classes) {
nweiz 2011/11/10 00:04:39 Perhaps cssWorld.classes should be a hash table so
terry 2011/11/22 16:40:47 You're probably right but I've err'd on the side o
+ if (selector.name == className) {
+ matches = classNameCheck(selector, matches);
+ found = true;
+ break;
nweiz 2011/11/10 00:04:39 If you break after you find a single valid selecto
terry 2011/11/22 16:40:47 No each selector is validated if any selector isn'
+ }
+ }
+ } else {
+ // Don't check any class name that is prefixed with an underscore.
+ // However, signal as found and bump up matches; it's a valid class
+ // name.
+ matches = classNameCheck(selector, matches);
+ found = true;
+ }
+ } else if (selector is IdSelector) {
+ // Any element id starting with an underscore is a private element id
+ // that doesn't have to match the world of known elemtn ids.
+ if (!selector.name.startsWith('_')) {
+ for (id in cssWorld.ids) {
+ if (selector.name == id) {
+ matches = elementIdCheck(selector, matches);
+ found = true;
+ break;
+ }
+ }
+ } else {
+ // Don't check any element ID that is prefixed with an underscore.
+ // However, signal as found and bump up matches; it's a valid element
+ // ID.
+ matches = elementIdCheck(selector, matches);
+ found = true;
+ }
+ } else {
+ String badSelector = selector.toString();
+ throw new CssSelectorException(
+ 'Invalid selector $badSelector', selector.span);
nweiz 2011/11/10 00:04:39 This is a confusing error message; it implies that
terry 2011/11/22 16:40:47 Good point I'll change the error. Done.
+ }
+
+ if (!found) {
+ errorSelector = selector; // Flag the problem selector.
nweiz 2011/11/10 00:04:39 If this is the only place you set errorSelector, w
terry 2011/11/22 16:40:47 Done.
+ break;
+ }
+ }
+
+ assert(matches >= 0 || matches == -1);
+
+ if (!found && errorSelector != null) {
+ String unknownName = errorSelector.toString();
+ throw new CssSelectorException('Unknown selector name $unknownName',
+ errorSelector.span);
+ }
+ }
+
+ ///////////////////////////////////////////////////////////////////
+ // Productions
+ ///////////////////////////////////////////////////////////////////
+
+ selectorExpression() {
nweiz 2011/11/10 00:04:39 This method doesn't seem to do anything.
terry 2011/11/22 16:40:47 It's gone. On 2011/11/10 00:04:39, nweiz wrote:
+ return simpleSelectorSequence();
+ }
+
+ simpleSelectorSequence() {
nweiz 2011/11/10 00:04:39 selector()
terry 2011/11/22 16:40:47 Done.
+ List<SimpleSelector> simpleSelectors = [];
+ while (true) {
+ var selectorItem = combinator();
+ if (selectorItem != null) {
+ simpleSelectors.add(selectorItem);
+ } else {
+ break;
+ }
+ }
+
+ return simpleSelectors;
+ }
+
+ combinator() {
nweiz 2011/11/10 00:04:39 simpleSelectorSequence()
terry 2011/11/22 16:40:47 Done.
+ int combinatorType = TokenKind.COMBINATOR_NONE;
+ switch (_peek()) {
+ case TokenKind.COMBINATOR_PLUS:
+ _eat(TokenKind.COMBINATOR_PLUS);
+ combinatorType = TokenKind.COMBINATOR_PLUS;
+ break;
+ case TokenKind.COMBINATOR_GREATER:
+ _eat(TokenKind.COMBINATOR_GREATER);
+ combinatorType = TokenKind.COMBINATOR_GREATER;
+ break;
+ case TokenKind.COMBINATOR_TILDE:
+ _eat(TokenKind.COMBINATOR_TILDE);
+ combinatorType = TokenKind.COMBINATOR_TILDE;
+ break;
+ }
nweiz 2011/11/10 00:04:39 You never seem to be parsing a sequence of simple
terry 2011/11/22 16:40:47 I had't completed the code but have now so sequenc
+
+ return namespaceElementUniversal(combinatorType);
+ }
+
+ /**
+ * Simple selector grammar:
+ * simple_selector_sequence
+ * : [ type_selector | universal ]
+ * [ HASH | class | attrib | pseudo | negation ]*
+ * | [ HASH | class | attrib | pseudo | negation ]+
+ * type_selector
+ * : [ namespace_prefix ]? element_name
+ * namespace_prefix
+ * : [ IDENT | '*' ]? '|'
+ * element_name
+ * : IDENT
+ * universal
+ * : [ namespace_prefix ]? '*'
+ * class
+ * : '.' IDENT
+ */
+ namespaceElementUniversal(Token combinator) {
nweiz 2011/11/10 00:04:39 simpleSelector()
terry 2011/11/22 16:40:47 Done.
+ String first;
+ switch (_peek()) {
+ case TokenKind.ASTERISK:
+ first = '*'; // Mark as universal namespace.
+ _next();
+ break;
+ case TokenKind.IDENTIFIER:
+ int startIdent = _peekToken.start;
+ first = identifier();
+ break;
+ }
+
+ if (first != null) {
nweiz 2011/11/10 00:04:39 Would be cleaner to return immediately if first ==
terry 2011/11/22 16:40:47 Done.
+ // Could be a namespace?
+ var isNamespace = _maybeEat(TokenKind.NAMESPACE);
+ if (isNamespace) {
+ var element;
+ switch (_peek()) {
nweiz 2011/11/10 00:04:39 I don't like this duplicated logic. Could you chec
terry 2011/11/22 16:40:47 For now I like the element vs/ namespace parsing I
+ case TokenKind.ASTERISK:
+ element = '*'; // Mark as universal.
+ _next();
+ break;
+ case TokenKind.IDENTIFIER:
+ int startIdent = _peekToken.start;
+ element = identifier();
+ break;
+ default:
+ _error('expected element name or universal, but found $_peekToken', _peekToken.span);
jimhug 2011/11/09 16:04:03 Nit: too long line.
terry 2011/11/09 21:56:06 Done.
+ }
+
+ return new NamespaceSelector(first, new ElementSelector(element),
+ combinator);
+ } else {
+ return new ElementSelector(first, combinator);
+ }
+ } else {
+ // Check for HASH | class | attrib | pseudo | negation
+ return selectorNameType(combinator);
+ }
+ }
+
+ selectorNameType(Token combinator) {
nweiz 2011/11/10 00:04:39 simpleSelectorTail()?
terry 2011/11/22 16:40:47 Done.
+ // Check for HASH | class | attrib | pseudo | negation
+ switch (_peek()) {
+ case TokenKind.HASH:
+ int startHash = _peekToken.start;
nweiz 2011/11/10 00:04:39 Unused variable.
terry 2011/11/22 16:40:47 Done.
+ _eat(TokenKind.HASH);
+ var name = identifier();
+ return new IdSelector(name, combinator);
+ case TokenKind.DOT:
+ _eat(TokenKind.DOT);
+ var name = identifier();
+ return new ClassSelector(name, combinator);
+ case TokenKind.PSEUDO:
+ // :pseudo-class ::pseudo-element
+ _eat(TokenKind.PSEUDO);
+ bool pseudoClass = _peek() != TokenKind.PSEUDO;
nweiz 2011/11/10 00:04:39 Seems like parsing : vs :: is a job for the tokeni
terry 2011/11/22 16:40:47 Your right. I'll fix in the next checkin.
+ var name = identifier();
+ // TODO(terry): Need to handle specific pseudo class/element name and
+ // backward compatible names that are : as well as ::.
+ return pseudoClass ?
+ new PseudoClassSelector(name, combinator) :
+ new PseudoElementSelector(name, combinator);
nweiz 2011/11/10 00:04:39 TODO: Some pseudo-class and -element selectors hav
terry 2011/11/22 16:40:47 Yep, I haven't finished this yet I'll add a TODO.
+
+ // TODO(terry): attrib, negation.
+ }
+ }
+
+ identifier() {
+ var tok = _next();
+ if (!TokenKind.isIdentifier(tok.kind)) {
nweiz 2011/11/10 00:04:39 Why aren't you just using _eat here?
terry 2011/11/22 16:40:47 Why for better error recovery? Currently, error r
+ try {
+ _error('expected identifier, but found $tok', tok.span);
+ } catch (var e) {
+ _error('expected identifier', tok.span);
+ }
+ }
+
+ return new Identifier(tok.text, _makeSpan(tok.start));
+ }
+}

Powered by Google App Engine
This is Rietveld 408576698