frog/css/parser_css.dart - Issue 8498020: Beginning of CSS parser using frog parsering infrastructure.

Unified Diff: frog/css/parser_css.dart

Issue 8498020: Beginning of CSS parser using frog parsering infrastructure. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 9 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: frog/css/parser_css.dart

diff --git a/frog/css/parser_css.dart b/frog/css/parser_css.dart

new file mode 100644

index 0000000000000000000000000000000000000000..6249aff081a1ac40098d208ac2d533b420f46540

--- /dev/null

+++ b/frog/css/parser_css.dart

@@ -0,0 +1,427 @@

+// BSD-style license that can be found in the LICENSE file.

+/**

+ * A simple recursive descent parser for the dart language.

jimhug 2011/11/09 16:04:03 Old comment.

terry 2011/11/09 21:56:06 Done.

+ */

+class Parser {

+ // If true throw CSSParserExceptions for any tokenizing/parsing problems.

+ bool _errorsAsException;

+ Tokenizer tokenizer;

+ final SourceFile source;

+ // TODO(jimhug): 1. Try to kill initializers, if fails, clean this up.

+ bool _inInitializers;

nweiz 2011/11/10 00:04:39 Unused variable.

terry 2011/11/16 14:00:22 Done.

+ Token _previousToken;

+ Token _peekToken;

+ Parser(this.source, [int startOffset = 0]) {

+ tokenizer = new Tokenizer(source, true, startOffset);

+ _peekToken = tokenizer.next();

+ _previousToken = null;

+ _inInitializers = false;

+ _errorsAsException = false;

+ }

+ /** Generate an error if [source] has not been completely consumed. */

+ void checkEndOfFile() {

+ _eat(TokenKind.END_OF_FILE);

+ }

+ /** Guard to break out of parser when an unexpected end of file is found. */

+ // TODO(jimhug): Failure to call this method can lead to inifinite parser

+ // loops. Consider embracing exceptions for more errors to reduce

+ // the danger here.

+ bool isPrematureEndOfFile() {

+ if (_maybeEat(TokenKind.END_OF_FILE)) {

+ _error('unexpected end of file', _peekToken.span);

+ return true;

+ } else {

+ return false;

+ }

+ ///////////////////////////////////////////////////////////////////

+ // Basic support methods

+ ///////////////////////////////////////////////////////////////////

+ int _peek() {

+ return _peekToken.kind;

+ }

+ Token _next() {

+ _previousToken = _peekToken;

+ _peekToken = tokenizer.next();

+ return _previousToken;

+ }

+ bool _peekKind(int kind) {

+ return _peekToken.kind == kind;

+ }

+ /* Is the next token a legal identifier? This includes pseudo-keywords. */

+ bool _peekIdentifier() {

+ return TokenKind.isIdentifier(_peekToken.kind);

nweiz 2011/11/10 00:04:39 Why can't you use _peekKind(TokenKind.IDENTIFIER)?

terry 2011/11/16 14:00:22 This is mimicked after frog's parser. On 2011/11/

nweiz 2011/11/23 00:59:44 Frog's parser needs it because there are multiple

+ }

+ bool _maybeEat(int kind) {

+ if (_peekToken.kind == kind) {

+ _previousToken = _peekToken;

+ _peekToken = tokenizer.next();

+ return true;

+ } else {

+ return false;

+ }

+ void _eat(int kind) {

+ if (!_maybeEat(kind)) {

+ _errorExpected(TokenKind.kindToString(kind));

+ }

+ void _eatSemicolon() {

nweiz 2011/11/10 00:04:39 Unused method.

terry 2011/11/16 14:00:22 Unused now will use later. On 2011/11/10 00:04:39

nweiz 2011/11/23 00:59:44 I don't think it'll be useful, actually... there a

+ _eat(TokenKind.SEMICOLON);

+ }

+ void _errorExpected(String expected) {

+ var tok = _next();

+ var message;

+ try {

+ message = 'expected $expected, but found $tok';

+ } catch (var e) {

+ message = 'parsing error expected $expected';

+ }

+ _error(message, tok.span);

+ }

+ void _error(String message, [SourceSpan location=null]) {

+ if (location === null) {

+ location = _peekToken.span;

+ }

+ // TODO(terry): Should use below world.fatal:

+ // world.fatal(message, location); // syntax errors are fatal for now

+ // TODO(terry): Beginning of temp code until world.fatal enabled.

+ var text = message;

+ if (location != null) {

+ text = location.toMessageString(message);

+ }

+ print("FATAL: $text");

+ // Any parsing problem throw exception too.

+ if (this._errorsAsException) {

+ throw new CssParserException(message, location);

+ }

+ // TODO(terry): End of temp code until world.fatal enabled.

+ }

+ SourceSpan _makeSpan(int start) {

+ return new SourceSpan(source, start, _previousToken.end);

+ }

+ ///////////////////////////////////////////////////////////////////

+ // Top level productions

+ ///////////////////////////////////////////////////////////////////

+ List<SelectorGroup> expression([bool throwErrors = false]) {

nweiz 2011/11/10 00:04:39 I think your terminology is confusing here. "expre

terry 2011/11/16 14:00:22 I see your point, expression was probably a bad na

+ this._errorsAsException = throwErrors;

+ List<SelectorGroup> groups = [];

+ while (!_maybeEat(TokenKind.END_OF_FILE)) {

+ do {

+ int start = _peekToken.start;

+ groups.add(new SelectorGroup(selectorExpression(), start));

+ } while (_maybeEat(TokenKind.COMMA));

+ }

+ return groups;

+ }

+ // Templates are @{selectors} single line nothing else.

+ SelectorGroup template([bool throwErrors = false]) {

+ this._errorsAsException = throwErrors;

+ SelectorGroup selectorGroup;

+ if (!_maybeEat(TokenKind.END_OF_FILE)) {

+ selectorGroup = templateExpression();

+ if (!_maybeEat(TokenKind.END_OF_FILE)) {

nweiz 2011/11/10 00:04:39 Can you just call isPrematureEndOfFile?

terry 2011/11/22 16:40:47 Done.

+ // TODO(terry): Error should be done.

+ }

+ return selectorGroup;

+ }

+ /*

+ * Expect @{css_expression}

+ */

+ templateExpression() {

+ SelectorGroup selectorGroup = null;

+ int start = _peekToken.start;

+ if (_peek() == TokenKind.AT) {

nweiz 2011/11/10 00:04:39 Is there any reason to do all these if checks, rat

terry 2011/11/16 14:00:22 Main reason was more exact error. However, I thin

+ _eat(TokenKind.AT);

+ if (_peek() == TokenKind.LBRACE) {

+ _eat(TokenKind.LBRACE);

+ List<SimpleSelector> selectors= selectorExpression();

+ SelectorGroup exprResult = new SelectorGroup(selectors, start);

+ if (_peek() == TokenKind.RBRACE) {

+ _eat(TokenKind.RBRACE);

+ selectorGroup = exprResult;

+ }

+ if (selectorGroup == null) {

+ _errorExpected('css template @{css selector}');

+ } else {

+ return selectorGroup;

+ }

+ int classNameCheck(var selector, int matches) {

+ if (selector.isCombinatorNone()) {

+ if (matches < 0) {

+ String tooMany = selector.toString();

+ throw new CssSelectorException(

+ 'Can not mix Id selector with class selector(s). Id ' +

nweiz 2011/11/10 00:04:39 This isn't actually true. "#foo.bar" is a valid se

terry 2011/11/16 14:00:22 Of course, however this is used by the validator f

nweiz 2011/11/23 00:59:44 Why aren't we allowing arbitrary selectors in sele

+ 'selector must be singleton too many starting at $tooMany',

+ selector.span);

+ }

+ return ++matches;

nweiz 2011/11/10 00:04:39 Style nit: matches + 1, since you aren't actually

terry 2011/11/22 16:40:47 Done.

+ } else {

+ String error = selector.toString();

+ throw new CssSelectorException(

+ 'Selectors can not have combinators (>, +, or ~) before $error',

+ selector.span);

+ }

+ int elementIdCheck(var selector, int matches) {

+ if (selector.isCombinatorNone()) {

+ if (matches != 0) {

+ String tooMany = selector.toString();

+ throw new CssSelectorException(

+ 'Use of Id selector must be singleton starting at $tooMany',

+ selector.span);

+ }

+ return --matches;

nweiz 2011/11/10 00:04:39 Style nit: matches - 1

terry 2011/11/22 16:40:47 Done.

+ } else {

+ String error = selector.toString();

+ throw new CssSelectorException(

+ 'Selectors can not have combinators (>, +, or ~) before $error',

+ selector.span);

+ }

+ // Validate the @{css expression} only .class and #elementId are valid inside

+ // of @{...}.

+ validateTemplate(List<Node> selectors, CssWorld cssWorld) {

nweiz 2011/11/10 00:04:39 Does this really belong in the parser? It's not ac

terry 2011/11/22 16:40:47 Will be moving this out in the next checkin. On 2

+ var errorSelector;

+ bool found = false;

+ bool matches = 0; // < 0 IdSelectors, > 0 ClassSelector

nweiz 2011/11/10 00:04:39 You're declaring an int as a bool here. Also, it s

terry 2011/11/22 16:40:47 Thanks, caught this earlier when I merged with the

+ for (selector in selectors) {

+ found = false;

+ if (selector is ClassSelector) {

+ // Any class name starting with an underscore is a private class name

+ // that doesn't have to match the world of known classes.

+ if (!selector.name.startsWith('_')) {

+ for (className in cssWorld.classes) {

nweiz 2011/11/10 00:04:39 Perhaps cssWorld.classes should be a hash table so

terry 2011/11/22 16:40:47 You're probably right but I've err'd on the side o

+ if (selector.name == className) {

+ matches = classNameCheck(selector, matches);

+ found = true;

+ break;

nweiz 2011/11/10 00:04:39 If you break after you find a single valid selecto

terry 2011/11/22 16:40:47 No each selector is validated if any selector isn'

+ }

+ } else {

+ // Don't check any class name that is prefixed with an underscore.

+ // However, signal as found and bump up matches; it's a valid class

+ // name.

+ matches = classNameCheck(selector, matches);

+ found = true;

+ }

+ } else if (selector is IdSelector) {

+ // Any element id starting with an underscore is a private element id

+ // that doesn't have to match the world of known elemtn ids.

+ if (!selector.name.startsWith('_')) {

+ for (id in cssWorld.ids) {

+ if (selector.name == id) {

+ matches = elementIdCheck(selector, matches);

+ found = true;

+ break;

+ }

+ } else {

+ // Don't check any element ID that is prefixed with an underscore.

+ // However, signal as found and bump up matches; it's a valid element

+ // ID.

+ matches = elementIdCheck(selector, matches);

+ found = true;

+ }

+ } else {

+ String badSelector = selector.toString();

+ throw new CssSelectorException(

+ 'Invalid selector $badSelector', selector.span);

nweiz 2011/11/10 00:04:39 This is a confusing error message; it implies that

terry 2011/11/22 16:40:47 Good point I'll change the error. Done.

+ }

+ if (!found) {

+ errorSelector = selector; // Flag the problem selector.

nweiz 2011/11/10 00:04:39 If this is the only place you set errorSelector, w

terry 2011/11/22 16:40:47 Done.

+ break;

+ }

+ assert(matches >= 0 || matches == -1);

+ if (!found && errorSelector != null) {

+ String unknownName = errorSelector.toString();

+ throw new CssSelectorException('Unknown selector name $unknownName',

+ errorSelector.span);

+ }

+ ///////////////////////////////////////////////////////////////////

+ // Productions

+ ///////////////////////////////////////////////////////////////////

+ selectorExpression() {

nweiz 2011/11/10 00:04:39 This method doesn't seem to do anything.

terry 2011/11/22 16:40:47 It's gone. On 2011/11/10 00:04:39, nweiz wrote:

+ return simpleSelectorSequence();

+ }

+ simpleSelectorSequence() {

nweiz 2011/11/10 00:04:39 selector()

terry 2011/11/22 16:40:47 Done.

+ List<SimpleSelector> simpleSelectors = [];

+ while (true) {

+ var selectorItem = combinator();

+ if (selectorItem != null) {

+ simpleSelectors.add(selectorItem);

+ } else {

+ break;

+ }

+ return simpleSelectors;

+ }

+ combinator() {

nweiz 2011/11/10 00:04:39 simpleSelectorSequence()

terry 2011/11/22 16:40:47 Done.

+ int combinatorType = TokenKind.COMBINATOR_NONE;

+ switch (_peek()) {

+ case TokenKind.COMBINATOR_PLUS:

+ _eat(TokenKind.COMBINATOR_PLUS);

+ combinatorType = TokenKind.COMBINATOR_PLUS;

+ break;

+ case TokenKind.COMBINATOR_GREATER:

+ _eat(TokenKind.COMBINATOR_GREATER);

+ combinatorType = TokenKind.COMBINATOR_GREATER;

+ break;

+ case TokenKind.COMBINATOR_TILDE:

+ _eat(TokenKind.COMBINATOR_TILDE);

+ combinatorType = TokenKind.COMBINATOR_TILDE;

+ break;

+ }

nweiz 2011/11/10 00:04:39 You never seem to be parsing a sequence of simple

terry 2011/11/22 16:40:47 I had't completed the code but have now so sequenc

+ return namespaceElementUniversal(combinatorType);

+ }

+ /**

+ * Simple selector grammar:

+ * simple_selector_sequence

+ * : [ type_selector | universal ]

+ * [ HASH | class | attrib | pseudo | negation ]*

+ * type_selector

+ * : [ namespace_prefix ]? element_name

+ * namespace_prefix

+ * : [ IDENT | '*' ]? '|'

+ * element_name

+ * : IDENT

+ * universal

+ * : [ namespace_prefix ]? '*'

+ * class

+ * : '.' IDENT

+ */

+ namespaceElementUniversal(Token combinator) {

nweiz 2011/11/10 00:04:39 simpleSelector()

terry 2011/11/22 16:40:47 Done.

+ String first;

+ switch (_peek()) {

+ case TokenKind.ASTERISK:

+ first = '*'; // Mark as universal namespace.

+ _next();

+ break;

+ case TokenKind.IDENTIFIER:

+ int startIdent = _peekToken.start;

+ first = identifier();

+ break;

+ }

+ if (first != null) {

nweiz 2011/11/10 00:04:39 Would be cleaner to return immediately if first ==

terry 2011/11/22 16:40:47 Done.

+ // Could be a namespace?

+ var isNamespace = _maybeEat(TokenKind.NAMESPACE);

+ if (isNamespace) {

+ var element;

+ switch (_peek()) {

nweiz 2011/11/10 00:04:39 I don't like this duplicated logic. Could you chec

terry 2011/11/22 16:40:47 For now I like the element vs/ namespace parsing I

+ case TokenKind.ASTERISK:

+ element = '*'; // Mark as universal.

+ _next();

+ break;

+ case TokenKind.IDENTIFIER:

+ int startIdent = _peekToken.start;

+ element = identifier();

+ break;

+ default:

+ _error('expected element name or universal, but found $_peekToken', _peekToken.span);

jimhug 2011/11/09 16:04:03 Nit: too long line.

terry 2011/11/09 21:56:06 Done.

+ }

+ return new NamespaceSelector(first, new ElementSelector(element),

+ combinator);

+ } else {

+ return new ElementSelector(first, combinator);

+ }

+ } else {

+ // Check for HASH | class | attrib | pseudo | negation

+ return selectorNameType(combinator);

+ }

+ selectorNameType(Token combinator) {

nweiz 2011/11/10 00:04:39 simpleSelectorTail()?

terry 2011/11/22 16:40:47 Done.

+ // Check for HASH | class | attrib | pseudo | negation

+ switch (_peek()) {

+ case TokenKind.HASH:

+ int startHash = _peekToken.start;

nweiz 2011/11/10 00:04:39 Unused variable.

terry 2011/11/22 16:40:47 Done.

+ _eat(TokenKind.HASH);

+ var name = identifier();

+ return new IdSelector(name, combinator);

+ case TokenKind.DOT:

+ _eat(TokenKind.DOT);

+ var name = identifier();

+ return new ClassSelector(name, combinator);

+ case TokenKind.PSEUDO:

+ // :pseudo-class ::pseudo-element

+ _eat(TokenKind.PSEUDO);

+ bool pseudoClass = _peek() != TokenKind.PSEUDO;

nweiz 2011/11/10 00:04:39 Seems like parsing : vs :: is a job for the tokeni

terry 2011/11/22 16:40:47 Your right. I'll fix in the next checkin.

+ var name = identifier();

+ // TODO(terry): Need to handle specific pseudo class/element name and

+ // backward compatible names that are : as well as ::.

+ return pseudoClass ?

+ new PseudoClassSelector(name, combinator) :

+ new PseudoElementSelector(name, combinator);

nweiz 2011/11/10 00:04:39 TODO: Some pseudo-class and -element selectors hav

terry 2011/11/22 16:40:47 Yep, I haven't finished this yet I'll add a TODO.

+ // TODO(terry): attrib, negation.

+ }

+ identifier() {

+ var tok = _next();

+ if (!TokenKind.isIdentifier(tok.kind)) {

nweiz 2011/11/10 00:04:39 Why aren't you just using _eat here?

terry 2011/11/22 16:40:47 Why for better error recovery? Currently, error r

+ try {

+ _error('expected identifier, but found $tok', tok.span);

+ } catch (var e) {

+ _error('expected identifier', tok.span);

+ }

+ return new Identifier(tok.text, _makeSpan(tok.start));

+ }

« frog/css/csstemplate.dart ('K') | « frog/css/cssworld.dart ('k') | frog/css/test.dart » ('j') | frog/css/test.dart » ('J')