utils/css/parser.dart - Issue 8937017: New CSS parser written in Dart to replace pyparser

Side by Side Diff: utils/css/parser.dart

Issue 8937017: New CSS parser written in Dart to replace pyparser (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Put back for DartC Created 9 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.
nweiz 2012/01/04 19:05:41 Typo? Typo?
4	3

5 /**	4 /**

6 * A simple recursive descent parser for CSS.	5 * A simple recursive descent parser for CSS.

7 */	6 */

8 class Parser {	7 class Parser {

9 Tokenizer tokenizer;	8 Tokenizer tokenizer;

10	9

	10 var _fs; // If non-null filesystem to read files.

	11 String _basePath; // Base path of CSS file.

	12

11 final lang.SourceFile source;	13 final lang.SourceFile source;

12	14

13 lang.Token _previousToken;	15 lang.Token _previousToken;

14 lang.Token _peekToken;	16 lang.Token _peekToken;

15	17

16 Parser(this.source, [int startOffset = 0]) {	18 Parser(this.source, [int start = 0, this._fs = null, this._basePath = null]) {

17 tokenizer = new Tokenizer(source, true, startOffset);	19 tokenizer = new Tokenizer(source, true, start);

18 _peekToken = tokenizer.next();	20 _peekToken = tokenizer.next();

19 _previousToken = null;	21 _previousToken = null;

20 }	22 }

21	23

	24 // Main entry point for parsing an entire CSS file.

	25 Stylesheet parse() {

	26 List<lang.Node> productions = [];

	27

	28 int start = _peekToken.start;

	29 while (!_maybeEat(TokenKind.END_OF_FILE)) {

	30 // TODO(terry): Need to handle charset, import, media and page.
	nweiz 2012/01/04 19:05:41 I think you'll eventually want to parse unknown di I think you'll eventually want to parse unknown directives in order to be forwards-compatible with new directives that will inevitably be added to CSS, as well as browser-specific directives. You don't want to get into the position where you need to update this code every time any browser adds a new directive.
	31 var directive = processDirective();

	32 if (directive != null) {

	33 productions.add(directive);

	34 } else {

	35 productions.add(processRuleSet());

	36 }

	37 }

	38

	39 return new Stylesheet(productions, _makeSpan(start));

	40 }

	41

22 /** Generate an error if [source] has not been completely consumed. */	42 /** Generate an error if [source] has not been completely consumed. */

23 void checkEndOfFile() {	43 void checkEndOfFile() {

24 _eat(TokenKind.END_OF_FILE);	44 _eat(TokenKind.END_OF_FILE);

25 }	45 }

26	46

27 /** Guard to break out of parser when an unexpected end of file is found. */	47 /** Guard to break out of parser when an unexpected end of file is found. */

28 // TODO(jimhug): Failure to call this method can lead to inifinite parser	48 // TODO(jimhug): Failure to call this method can lead to inifinite parser

29 // loops. Consider embracing exceptions for more errors to reduce	49 // loops. Consider embracing exceptions for more errors to reduce

30 // the danger here.	50 // the danger here.

31 bool isPrematureEndOfFile() {	51 bool isPrematureEndOfFile() {

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
99 }	119 }

100	120

101 lang.SourceSpan _makeSpan(int start) {	121 lang.SourceSpan _makeSpan(int start) {

102 return new lang.SourceSpan(source, start, _previousToken.end);	122 return new lang.SourceSpan(source, start, _previousToken.end);

103 }	123 }

104	124

105 ///////////////////////////////////////////////////////////////////	125 ///////////////////////////////////////////////////////////////////

106 // Top level productions	126 // Top level productions

107 ///////////////////////////////////////////////////////////////////	127 ///////////////////////////////////////////////////////////////////

108	128

109 List<SelectorGroup> preprocess() {

110 List<SelectorGroup> groups = [];

111 while (!_maybeEat(TokenKind.END_OF_FILE)) {

112 do {

113 int start = _peekToken.start;

114 groups.add(new SelectorGroup(selector(),

115 _makeSpan(start)));

116 } while (_maybeEat(TokenKind.COMMA));

117 }

118

119 return groups;

120 }

121

122 // Templates are @{selectors} single line nothing else.	129 // Templates are @{selectors} single line nothing else.

123 SelectorGroup template() {	130 SelectorGroup parseTemplate() {
	nweiz 2012/01/04 19:05:41 Now that we're parsing things other than just sele Now that we're parsing things other than just selectors, maybe call this parseSelectorTemplate?
124 SelectorGroup selectorGroup = null;	131 SelectorGroup selectorGroup = null;

125 if (!isPrematureEndOfFile()) {	132 if (!isPrematureEndOfFile()) {

126 selectorGroup = templateExpression();	133 selectorGroup = templateExpression();

127 }	134 }

128	135

129 return selectorGroup;	136 return selectorGroup;

130 }	137 }

131	138

132 /*	139 /*

133 * Expect @{css_expression}	140 * Expect @{css_expression}

134 */	141 */

135 templateExpression() {	142 templateExpression() {

	143 List<Selector> selectors = [];

	144

136 int start = _peekToken.start;	145 int start = _peekToken.start;

137	146

138 _eat(TokenKind.AT);	147 _eat(TokenKind.AT);

139 _eat(TokenKind.LBRACE);	148 _eat(TokenKind.LBRACE);

140	149

141 SelectorGroup group = new SelectorGroup(selector(),	150 selectors.add(processSelector());

142 _makeSpan(start));	151 SelectorGroup group = new SelectorGroup(selectors, _makeSpan(start));
	nweiz 2012/01/04 19:05:41 Why not just new SelectorGroup([processSelector()] Why not just new SelectorGroup([processSelector()], ...)?
143	152

144 _eat(TokenKind.RBRACE);	153 _eat(TokenKind.RBRACE);

145	154

146 return group;	155 return group;

147 }	156 }

148	157

149 int classNameCheck(var selector, int matches) {

150 if (selector.isCombinatorDescendant() \|\|

151 (selector.isCombinatorNone() && matches == 0)) {

152 if (matches < 0) {

153 String tooMany = selector.toString();

154 throw new CssSelectorException(

155 'Can not mix Id selector with class selector(s). Id ' +

156 'selector must be singleton too many starting at $tooMany');

157 }

158

159 return matches + 1;

160 } else {

161 String error = selector.toString();

162 throw new CssSelectorException(

163 'Selectors can not have combinators (>, +, or ~) before $error');

164 }

165 }

166

167 int elementIdCheck(var selector, int matches) {

168 if (selector.isCombinatorNone() && matches == 0) {

169 // Perfect just one element id returns matches of -1.

170 return -1;

171 } else if (selector.isCombinatorDescendant()) {

172 String tooMany = selector.toString();

173 throw new CssSelectorException(

174 'Use of Id selector must be singleton starting at $tooMany');

175 } else {

176 String error = selector.toString();

177 throw new CssSelectorException(

178 'Selectors can not have combinators (>, +, or ~) before $error');

179 }

180 }

181

182 // Validate the @{css expression} only .class and #elementId are valid inside

183 // of @{...}.

184 validateTemplate(List<lang.Node> selectors, CssWorld cssWorld) {

185 var errorSelector; // signal which selector didn't match.

186 bool found = false; // signal if a selector is matched.

187

188 int matches = 0; // < 0 IdSelectors, > 0 ClassSelector

189 for (selector in selectors) {

190 found = false;

191 if (selector is ClassSelector) {

192 // Any class name starting with an underscore is a private class name

193 // that doesn't have to match the world of known classes.

194 if (!selector.name.startsWith('_')) {

195 // TODO(terry): For now iterate through all classes look for faster

196 // mechanism hash map, etc.

197 for (className in cssWorld.classes) {

198 if (selector.name == className) {

199 matches = classNameCheck(selector, matches);

200 found = true; // .class found.

201 break;

202 }

203 }

204 } else {

205 // Don't check any class name that is prefixed with an underscore.

206 // However, signal as found and bump up matches; it's a valid class

207 // name.

208 matches = classNameCheck(selector, matches);

209 found = true; // ._class are always okay.

210 }

211 } else if (selector is IdSelector) {

212 // Any element id starting with an underscore is a private element id

213 // that doesn't have to match the world of known elemtn ids.

214 if (!selector.name.startsWith('_')) {

215 for (id in cssWorld.ids) {

216 if (selector.name == id) {

217 matches = elementIdCheck(selector, matches);

218 found = true; // #id found.

219 break;

220 }

221 }

222 } else {

223 // Don't check any element ID that is prefixed with an underscore.

224 // However, signal as found and bump up matches; it's a valid element

225 // ID.

226 matches = elementIdCheck(selector, matches);

227 found = true; // #_id are always okay

228 }

229 } else {

230 String badSelector = selector.toString();

231 throw new CssSelectorException(

232 'Invalid template selector $badSelector');

233 }

234

235 if (!found) {

236 String unknownName = selector.toString();

237 throw new CssSelectorException('Unknown selector name $unknownName');

238 }

239 }

240

241 // Every selector must match.

242 assert((matches >= 0 ? matches : -matches) == selectors.length);

243 }

244

245 ///////////////////////////////////////////////////////////////////	158 ///////////////////////////////////////////////////////////////////

246 // Productions	159 // Productions

247 ///////////////////////////////////////////////////////////////////	160 ///////////////////////////////////////////////////////////////////

248	161

249 selector() {	162 processMedia([bool oneRequired = false]) {
	nweiz 2012/01/04 19:05:41 Why are all the names prefixed with "process"? See Why are all the names prefixed with "process"? Seems redundant, as well as inconsistent with the style in frog/parser. nweiz 2012/01/04 19:05:41 This should be called processMediaQuery, since it This should be called processMediaQuery, since it only parses the query portion and not the full @media directive.
250 List<SimpleSelector> simpleSelectors = [];	163 List<String> media = [];

	164

	165 while (_peekIdentifier()) {
	nweiz 2012/01/04 19:05:41 Add a TODO here about supporting the full media qu Add a TODO here about supporting the full media query syntax (http://www.w3.org/TR/css3-mediaqueries/).
	166 // We have some media types.

	167 var medium = identifier(); // Medium ident.
	nweiz 2012/01/04 19:05:41 What does "medium" mean here? What does "medium" mean here?
	168 media.add(medium);

	169 if (!_maybeEat(TokenKind.COMMA)) {

	170 // No more media types exit now.

	171 break;

	172 }

	173 }

	174

	175 if (oneRequired && media.length == 0) {

	176 _error('at least one media type required', _peekToken.span);

	177 }

	178

	179 return media;

	180 }

	181

	182 // Directive grammar:

	183 //

	184 // import: '@import' [string \| URI] media_list?

	185 // media: '@media' media_list '{' ruleset '}'

	186 // page: '@page' [':' IDENT]? '{' declarations '}'

	187 // include: '@include' [string \| URI]

	188 // stylet: '@stylet' IDENT '{' ruleset '}'

	189 // media_list: IDENT [',' IDENT]

	190 // keyframes: '@-webkit-keyframes ...' (see grammar below).
	nweiz 2012/01/04 19:05:41 Also @-moz-keyframes, and probably also @keyframes Also @-moz-keyframes, and probably also @keyframes for forwards-compatibility.
	191 // font_face: '@font-face' '{' declarations '}'

	192 //

	193 processDirective() {

	194 int start = _peekToken.start;

	195

	196 if (_maybeEat(TokenKind.AT)) {
	nweiz 2012/01/04 19:05:41 if (!_maybeEat(TokenKind.AT)) return; if (!_maybeEat(TokenKind.AT)) return;
	197 switch (_peek()) {

	198 case TokenKind.DIRECTIVE_IMPORT:

	199 _next();

	200

	201 String importStr;

	202 if (_peekIdentifier()) {

	203 var func = processFunction(identifier());

	204 if (func is UriTerm) {
	nweiz 2012/01/04 19:05:41 This needs better error handling. This needs better error handling.
	205 importStr = func.text;

	206 }

	207 } else {

	208 importStr = processQuotedString(false);

	209 }

	210

	211 // Any medias?
	nweiz 2012/01/04 19:05:41 Grammar nit: "media" is already plural Grammar nit: "media" is already plural
	212 List<String> medias = processMedia();

	213

	214 if (importStr == null) {

	215 _error('missing import string', _peekToken.span);

	216 }

	217 return new ImportDirective(importStr, medias, _makeSpan(start));

	218 case TokenKind.DIRECTIVE_MEDIA:

	219 _next();

	220

	221 // Any medias?

	222 List<String> media = processMedia(true);

	223 RuleSet ruleset;

	224

	225 if (_maybeEat(TokenKind.LBRACE)) {

	226 ruleset = processRuleSet();
	nweiz 2012/01/04 19:05:41 @media directives can contain multiple rulesets. @media directives can contain multiple rulesets.
	227 if (!_maybeEat(TokenKind.RBRACE)) {

	228 _error('expected } after ruleset for @media', _peekToken.span);

	229 }

	230 } else {

	231 _error('expected { after media before ruleset', _peekToken.span);

	232 }

	233 return new MediaDirective(media, ruleset, _makeSpan(start));

	234 case TokenKind.DIRECTIVE_PAGE:

	235 _next();

	236

	237 // Any pseudo page?

	238 var pseudoPage;

	239 if (_maybeEat(TokenKind.COLON)) {

	240 if (_peekIdentifier()) {
	nweiz 2012/01/04 19:05:41 What if there is no identifier after the colon? What if there is no identifier after the colon?
	241 pseudoPage = identifier();

	242 }

	243 }

	244 return new PageDirective(pseudoPage, processDeclarations(),

	245 _makeSpan(start));

	246 case TokenKind.DIRECTIVE_KEYFRAMES:

	247 /* Key frames grammar:

	248 *

	249 * @-webkit-keyframes [IDENT\|STRING] '{' keyframes-blocks '}';

	250 *

	251 * keyframes-blocks:

	252 * [keyframe-selectors '{' declarations '}']* ;

	253 *

	254 * keyframe-selectors:

	255 * ['from'\|'to'\|PERCENTAGE] [',' ['from'\|'to'\|PERCENTAGE] ]* ;

	256 */

	257 _next();

	258

	259 var name;

	260 if (_peekIdentifier()) {
	nweiz 2012/01/04 19:05:41 Needs error handling. Needs error handling.
	261 name = identifier();

	262 }

	263

	264 _eat(TokenKind.LBRACE);

	265

	266 KeyFrameDirective kf = new KeyFrameDirective(name, _makeSpan(start));

	267

	268 do {

	269 Expressions selectors = new Expressions(_makeSpan(start));

	270

	271 do {

	272 var term = processTerm();

	273

	274 // TODO(terry): Only allow from, to and PERCENTAGE ...

	275

	276 selectors.add(term);

	277 } while (_maybeEat(TokenKind.COMMA));

	278

	279 kf.add(new KeyFrameBlock(selectors, processDeclarations(),

	280 _makeSpan(start)));

	281

	282 } while (!_maybeEat(TokenKind.RBRACE));

	283

	284 return kf;

	285 case TokenKind.DIRECTIVE_FONTFACE:

	286 _next();

	287

	288 List<Declaration> decls = [];

	289

	290 // TODO(terry): To Be Implemented

	291

	292 return new FontFaceDirective(decls, _makeSpan(start));

	293 case TokenKind.DIRECTIVE_INCLUDE:

	294 _next();

	295 String filename = processQuotedString(false);

	296 if (_fs != null) {

	297 // Does CSS file exist?

	298 if (_fs.fileExists('${_basePath}${filename}')) {
	nweiz 2012/01/04 19:05:41 I really don't like resolving the @include in the I really don't like resolving the @include in the parser. This seems like the job for a different part of the compilation process. Parsing should never invoke the filesystem.
	299 String basePath = "";

	300 int idx = filename.lastIndexOf('/');

	301 if (idx >= 0) {

	302 basePath = filename.substring(0, idx + 1);

	303 }

	304 basePath = '${_basePath}${basePath}';

	305 // Yes, let's parse this file as well.

	306 String fullFN = '${basePath}${filename}';

	307 String contents = _fs.readAll(fullFN);

	308 Parser parser = new Parser(new lang.SourceFile(fullFN, contents), 0,

	309 _fs, basePath);

	310 Stylesheet stylesheet = parser.parse();

	311 return new IncludeDirective(filename, stylesheet, _makeSpan(start));

	312 }

	313

	314 _error('file doesn\'t exist ${filename}', _peekToken.span);

	315 }

	316

	317 print("WARNING: @include doesn't work for uitest");

	318 return new IncludeDirective(filename, null, _makeSpan(start));

	319 case TokenKind.DIRECTIVE_STYLET:

	320 /* Stylet grammar:

	321 *

	322 * @stylet IDENT '{'

	323 * ruleset

	324 * '}'

	325 */

	326 _next();

	327

	328 var name;

	329 if (_peekIdentifier()) {
	nweiz 2012/01/04 19:05:41 Needs error handling. Needs error handling.
	330 name = identifier();

	331 }

	332

	333 _eat(TokenKind.LBRACE);

	334

	335 List<lang.Node> productions = [];

	336

	337 int start = _peekToken.start;

	338 while (!_maybeEat(TokenKind.END_OF_FILE)) {

	339 RuleSet ruleset = processRuleSet();

	340 if (ruleset == null) {

	341 break;

	342 }

	343 productions.add(ruleset);

	344 }

	345

	346 _eat(TokenKind.RBRACE);

	347

	348 return new StyletDirective(name, productions, _makeSpan(start));

	349 default:

	350 _error('unknown directive, found $_peekToken', _peekToken.span);

	351 }

	352 }

	353 }

	354

	355 processRuleSet() {

	356 int start = _peekToken.start;

	357

	358 SelectorGroup selGroup = processSelectorGroup();

	359 if (selGroup != null) {

	360 return new RuleSet(selGroup, processDeclarations(), _makeSpan(start));

	361 }

	362 }

	363

	364 DeclarationGroup processDeclarations() {

	365 int start = _peekToken.start;

	366

	367 _eat(TokenKind.LBRACE);

	368

	369 List<Declaration> decls = [];

	370 do {

	371 Declaration decl = processDeclaration();

	372 if (decl != null) {

	373 decls.add(decl);

	374 }

	375 } while (_maybeEat(TokenKind.SEMICOLON));

	376

	377 _eat(TokenKind.RBRACE);

	378

	379 return new DeclarationGroup(decls, _makeSpan(start));

	380 }

	381

	382 SelectorGroup processSelectorGroup() {

	383 List<Selector> selectors = [];

	384 int start = _peekToken.start;

	385 do {

	386 Selector selector = processSelector();

	387 if (selector != null) {

	388 selectors.add(selector);

	389 }

	390 } while (_maybeEat(TokenKind.COMMA));

	391

	392 if (selectors.length > 0) {

	393 return new SelectorGroup(selectors, _makeSpan(start));

	394 }

	395 }

	396

	397 /* Return list of selectors

	398 *

	399 */

	400 processSelector() {

	401 List<SimpleSelectorSequence> simpleSequences = [];

	402 int start = _peekToken.start;

251 while (true) {	403 while (true) {

252 // First item is never descendant make sure it's COMBINATOR_NONE.	404 // First item is never descendant make sure it's COMBINATOR_NONE.

253 var selectorItem = simpleSelectorSequence(simpleSelectors.length == 0);	405 var selectorItem = simpleSelectorSequence(simpleSequences.length == 0);

254 if (selectorItem != null) {	406 if (selectorItem != null) {

255 simpleSelectors.add(selectorItem);	407 simpleSequences.add(selectorItem);

256 } else {	408 } else {

257 break;	409 break;

258 }	410 }

259 }	411 }

260	412

261 return simpleSelectors;	413 if (simpleSequences.length > 0) {

	414 return new Selector(simpleSequences, _makeSpan(start));

	415 }

262 }	416 }

263	417

264 simpleSelectorSequence(bool forceCombinatorNone) {	418 simpleSelectorSequence(bool forceCombinatorNone) {

	419 int start = _peekToken.start;

265 int combinatorType = TokenKind.COMBINATOR_NONE;	420 int combinatorType = TokenKind.COMBINATOR_NONE;

	421

266 switch (_peek()) {	422 switch (_peek()) {

267 case TokenKind.COMBINATOR_PLUS:	423 case TokenKind.PLUS:

268 _eat(TokenKind.COMBINATOR_PLUS);	424 _eat(TokenKind.PLUS);

269 combinatorType = TokenKind.COMBINATOR_PLUS;	425 combinatorType = TokenKind.COMBINATOR_PLUS;

270 break;	426 break;

271 case TokenKind.COMBINATOR_GREATER:	427 case TokenKind.GREATER:

272 _eat(TokenKind.COMBINATOR_GREATER);	428 _eat(TokenKind.GREATER);

273 combinatorType = TokenKind.COMBINATOR_GREATER;	429 combinatorType = TokenKind.COMBINATOR_GREATER;

274 break;	430 break;

275 case TokenKind.COMBINATOR_TILDE:	431 case TokenKind.TILDE:

276 _eat(TokenKind.COMBINATOR_TILDE);	432 _eat(TokenKind.TILDE);

277 combinatorType = TokenKind.COMBINATOR_TILDE;	433 combinatorType = TokenKind.COMBINATOR_TILDE;

278 break;	434 break;

279 }	435 }

280	436

281 // Check if WHITESPACE existed between tokens if so we're descendent.	437 // Check if WHITESPACE existed between tokens if so we're descendent.

282 if (combinatorType == TokenKind.COMBINATOR_NONE && !forceCombinatorNone) {	438 if (combinatorType == TokenKind.COMBINATOR_NONE && !forceCombinatorNone) {

283 if (this._previousToken != null &&	439 if (this._previousToken != null &&

284 this._previousToken.end != this._peekToken.start) {	440 this._previousToken.end != this._peekToken.start) {

285 combinatorType = TokenKind.COMBINATOR_DESCENDANT;	441 combinatorType = TokenKind.COMBINATOR_DESCENDANT;

286 }	442 }

287 }	443 }

288	444

289 return simpleSelector(combinatorType);	445 var simpleSel = simpleSelector();

	446 if (simpleSel != null) {

	447 return new SimpleSelectorSequence(simpleSel, _makeSpan(start),

	448 combinatorType);

	449 }

290 }	450 }

291	451

292 /**	452 /**

293 * Simple selector grammar:	453 * Simple selector grammar:

	454 *

294 * simple_selector_sequence	455 * simple_selector_sequence

295 * : [ type_selector \| universal ]	456 * : [ type_selector \| universal ]

296 * [ HASH \| class \| attrib \| pseudo \| negation ]*	457 * [ HASH \| class \| attrib \| pseudo \| negation ]*

297 * \| [ HASH \| class \| attrib \| pseudo \| negation ]+	458 * \| [ HASH \| class \| attrib \| pseudo \| negation ]+

298 * type_selector	459 * type_selector

299 * : [ namespace_prefix ]? element_name	460 * : [ namespace_prefix ]? element_name

300 * namespace_prefix	461 * namespace_prefix

301 * : [ IDENT \| '*' ]? '\|'	462 * : [ IDENT \| '*' ]? '\|'

302 * element_name	463 * element_name

303 * : IDENT	464 * : IDENT

304 * universal	465 * universal

305 * : [ namespace_prefix ]? '*'	466 * : [ namespace_prefix ]? '*'

306 * class	467 * class

307 * : '.' IDENT	468 * : '.' IDENT

308 */	469 */

309 simpleSelector(int combinator) {	470 simpleSelector() {

310 // TODO(terry): Nathan makes a good point parsing of namespace and element	471 // TODO(terry): Nathan makes a good point parsing of namespace and element

311 // are essentially the same (asterisk or identifier) other	472 // are essentially the same (asterisk or identifier) other

312 // than the error message for element. Should consolidate the	473 // than the error message for element. Should consolidate the

313 // code.	474 // code.

	475 // TODO(terry): Need to handle attribute namespace too.

314 var first;	476 var first;

315 int start = _peekToken.start;	477 int start = _peekToken.start;

316 switch (_peek()) {	478 switch (_peek()) {

317 case TokenKind.ASTERISK:	479 case TokenKind.ASTERISK:

318 // Mark as universal namespace.	480 // Mark as universal namespace.

319 var tok = _next();	481 var tok = _next();

320 first = new Wildcard(_makeSpan(tok.start));	482 first = new Wildcard(_makeSpan(tok.start));

321 break;	483 break;

322 case TokenKind.IDENTIFIER:	484 case TokenKind.IDENTIFIER:

323 int startIdent = _peekToken.start;	485 int startIdent = _peekToken.start;

324 first = identifier();	486 first = identifier();

325 break;	487 break;

326 }	488 }

327	489

328 if (first == null) {	490 if (_maybeEat(TokenKind.NAMESPACE)) {

329 // Check for HASH \| class \| attrib \| pseudo \| negation

330 return simpleSelectorTail(combinator);

331 }

332

333 // Could be a namespace?

334 var isNamespace = _maybeEat(TokenKind.NAMESPACE);

335 if (isNamespace) {

336 var element;	491 var element;

337 switch (_peek()) {	492 switch (_peek()) {

338 case TokenKind.ASTERISK:	493 case TokenKind.ASTERISK:

339 // Mark as universal element	494 // Mark as universal element

340 var tok = _next();	495 var tok = _next();

341 element = new Wildcard(_makeSpan(tok.start));	496 element = new Wildcard(_makeSpan(tok.start));

342 break;	497 break;

343 case TokenKind.IDENTIFIER:	498 case TokenKind.IDENTIFIER:

344 element = identifier();	499 element = identifier();

345 break;	500 break;

346 default:	501 default:

347 _error('expected element name or universal(*), but found $_peekToken',	502 _error('expected element name or universal(*), but found $_peekToken',

348 _peekToken.span);	503 _peekToken.span);

349 }	504 }

350	505

351 return new NamespaceSelector(first,	506 return new NamespaceSelector(first,

352 new ElementSelector(element, element.span),	507 new ElementSelector(element, element.span), _makeSpan(start));

353 _makeSpan(start), combinator);	508 } else if (first != null) {

	509 return new ElementSelector(first, _makeSpan(start));

354 } else {	510 } else {

355 return new ElementSelector(first, _makeSpan(start), combinator);	511 // Check for HASH \| class \| attrib \| pseudo \| negation

356 }	512 return simpleSelectorTail();

357 }	513 }

358	514 }

359 simpleSelectorTail(int combinator) {	515

	516 simpleSelectorTail() {

360 // Check for HASH \| class \| attrib \| pseudo \| negation	517 // Check for HASH \| class \| attrib \| pseudo \| negation

361 int start = _peekToken.start;	518 int start = _peekToken.start;

362 switch (_peek()) {	519 switch (_peek()) {

363 case TokenKind.HASH:	520 case TokenKind.HASH:

364 _eat(TokenKind.HASH);	521 _eat(TokenKind.HASH);

365 return new IdSelector(identifier(), _makeSpan(start), combinator);	522 return new IdSelector(identifier(), _makeSpan(start));

366 case TokenKind.DOT:	523 case TokenKind.DOT:

367 _eat(TokenKind.DOT);	524 _eat(TokenKind.DOT);

368 return new ClassSelector(identifier(), _makeSpan(start), combinator);	525 return new ClassSelector(identifier(), _makeSpan(start));

369 case TokenKind.PSEUDO:	526 case TokenKind.COLON:

370 // :pseudo-class ::pseudo-element	527 // :pseudo-class ::pseudo-element

371 // TODO(terry): '::' should be token.	528 // TODO(terry): '::' should be token.

372 _eat(TokenKind.PSEUDO);	529 _eat(TokenKind.COLON);

373 bool pseudoClass = _peek() != TokenKind.PSEUDO;	530 bool pseudoClass = _peek() != TokenKind.COLON;

374 var name = identifier();	531 var name = identifier();

375 // TODO(terry): Need to handle specific pseudo class/element name and	532 // TODO(terry): Need to handle specific pseudo class/element name and

376 // backward compatible names that are : as well as :: as well as	533 // backward compatible names that are : as well as :: as well as

377 // parameters.	534 // parameters.

378 return pseudoClass ?	535 return pseudoClass ?

379 new PseudoClassSelector(name, _makeSpan(start), combinator) :	536 new PseudoClassSelector(name, _makeSpan(start)) :

380 new PseudoElementSelector(name, _makeSpan(start), combinator);	537 new PseudoElementSelector(name, _makeSpan(start));

381	538 case TokenKind.LBRACK:

382 // TODO(terry): attrib, negation.	539 return processAttribute();

383 }	540 }

	541 }

	542

	543 // Attribute grammar:

	544 //

	545 // attributes :

	546 // '[' S* IDENT S* [ ATTRIB_MATCHES S* [ IDENT \| STRING ] S* ]? ']'

	547 //

	548 // ATTRIB_MATCHES :

	549 // [ '=' \| INCLUDES \| DASHMATCH \| PREFIXMATCH \| SUFFIXMATCH \| SUBSTRMATCH ]

	550 //

	551 // INCLUDES: '~='

	552 //

	553 // DASHMATCH: '\|='

	554 //

	555 // PREFIXMATCH: '^='

	556 //

	557 // SUFFIXMATCH: '$='

	558 //

	559 // SUBSTRMATCH: '*='

	560 //

	561 //

	562 processAttribute() {

	563 int start = _peekToken.start;

	564

	565 if (_maybeEat(TokenKind.LBRACK)) {
	nweiz 2012/01/04 19:05:41 if (!...) return; if (!...) return;
	566 var attrName = identifier();

	567

	568 int op = TokenKind.NO_MATCH;
	nweiz 2012/01/04 19:05:41 This would be clearer if you set op to NO_MATCH in This would be clearer if you set op to NO_MATCH in the default clause of the switch statement.
	569 switch (_peek()) {

	570 case TokenKind.EQUALS:

	571 case TokenKind.INCLUDES: // ~=

	572 case TokenKind.DASH_MATCH: // \|=

	573 case TokenKind.PREFIX_MATCH: // ^=

	574 case TokenKind.SUFFIX_MATCH: // $=

	575 case TokenKind.SUBSTRING_MATCH: // *=

	576 op = _peek();

	577 _next();

	578 break;

	579 }

	580

	581 String value;

	582 if (op != TokenKind.NO_MATCH) {

	583 // Operator hit so we require a value too.

	584 if (_peekIdentifier()) {

	585 value = identifier();

	586 } else {

	587 value = processQuotedString(false);

	588 }

	589

	590 if (value == null) {

	591 _error('expected attribute value string or ident', _peekToken.span);

	592 }

	593 }

	594

	595 _eat(TokenKind.RBRACK);

	596

	597 return new AttributeSelector(attrName, op, value, _makeSpan(start));

	598 }

	599 }

	600

	601 // Declaration grammar:

	602 //

	603 // declaration: property ':' expr prio?

	604 //

	605 // property: IDENT

	606 // prio: !important

	607 // expr: (see processExpr)

	608 //

	609 processDeclaration() {

	610 Declaration decl;

	611

	612 int start = _peekToken.start;

	613

	614 // IDENT ':' expr '!important'?

	615 if (TokenKind.isIdentifier(_peekToken.kind)) {
	nweiz 2012/01/04 19:05:41 if (!...) return null; if (!...) return null;
	616 var propertyIdent = identifier();

	617 _eat(TokenKind.COLON);

	618

	619 decl = new Declaration(propertyIdent, processExpr(), _makeSpan(start));

	620

	621 // Handle !important (prio)

	622 decl.important = _maybeEat(TokenKind.IMPORTANT);

	623 }

	624

	625 return decl;

	626 }

	627

	628 // Expression grammar:

	629 //

	630 // expression: term [ operator? term]*

	631 //

	632 // operator: '/' \| ','

	633 // term: (see processTerm)

	634 //

	635 processExpr() {

	636 int start = _peekToken.start;

	637 Expressions expressions = new Expressions(_makeSpan(start));

	638

	639 bool keepGoing = true;

	640 var expr;

	641 while (keepGoing && (expr = processTerm()) != null) {

	642 var op;

	643

	644 int opStart = _peekToken.start;

	645

	646 switch (_peek()) {

	647 case TokenKind.SLASH:

	648 op = new OperatorSlash(_makeSpan(opStart));

	649 break;

	650 case TokenKind.COMMA:

	651 op = new OperatorComma(_makeSpan(opStart));

	652 break;

	653 }

	654

	655 if (expr != null) {

	656 expressions.add(expr);

	657 } else {

	658 keepGoing = false;

	659 }

	660

	661 if (op != null) {

	662 expressions.add(op);

	663 _next();

	664 }

	665 }

	666

	667 return expressions;

	668 }

	669

	670 // Term grammar:

	671 //

	672 // term:

	673 // unary_operator?

	674 // [ term_value ]

	675 // \| STRING S* \| IDENT S* \| URI S* \| UNICODERANGE S* \| hexcolor

	676 //

	677 // term_value:

	678 // NUMBER S* \| PERCENTAGE S* \| LENGTH S* \| EMS S* \| EXS S* \| ANGLE S* \|

	679 // TIME S* \| FREQ S* \| function

	680 //

	681 // NUMBER: {num}

	682 // PERCENTAGE: {num}%

	683 // LENGTH: {num}['px' \| 'cm' \| 'mm' \| 'in' \| 'pt' \| 'pc']

	684 // EMS: {num}'em'

	685 // EXS: {num}'ex'

	686 // ANGLE: {num}['deg' \| 'rad' \| 'grad']

	687 // TIME: {num}['ms' \| 's']

	688 // FREQ: {num}['hz' \| 'khz']

	689 // function: IDENT '(' expr ')'

	690 //

	691 processTerm() {

	692 int start = _peekToken.start;

	693 lang.Token t; // token for term's value

	694 var value; // value of term (numeric values)

	695

	696 var unary = "";

	697

	698 switch (_peek()) {

	699 case TokenKind.HASH:

	700 this._eat(TokenKind.HASH);

	701 String hexText;

	702 if (_peekKind(TokenKind.INTEGER)) {
	nweiz 2012/01/04 19:05:41 This seems really nasty. Shouldn't you be parsing This seems really nasty. Shouldn't you be parsing HASH as a token unto itself as per the CSS grammar?
	703 String hexText1 = _peekToken.text;

	704 _next();

	705 if (_peekIdentifier()) {

	706 hexText = '${hexText1}${identifier().name}';

	707 } else {

	708 hexText = hexText1;

	709 }

	710 } else if (_peekIdentifier()) {

	711 hexText = identifier().name;

	712 } else {

	713 _errorExpected("hex number");

	714 }

	715

	716 try {

	717 int hexValue = parseHex(hexText);

	718 return new HexColorTerm(hexValue, hexText, _makeSpan(start));

	719 } catch (HexNumberException hne) {

	720 _error('Bad hex number', _makeSpan(start));

	721 }

	722 case TokenKind.INTEGER:

	723 t = _next();

	724 value = Math.parseInt("${unary}${t.text}");

	725 break;

	726 case TokenKind.DOUBLE:

	727 t = _next();

	728 value = Math.parseDouble("${unary}${t.text}");

	729 break;

	730 case TokenKind.SINGLE_QUOTE:

	731 case TokenKind.DOUBLE_QUOTE:

	732 value = processQuotedString(false);

	733 value = '"${value}"';
	nweiz 2012/01/04 19:05:41 This will break if the original string was single- This will break if the original string was single-quoted and included a double quote. In order to preserve the original structure of the CSS, you should probably keep track of whether the original string was single- or double-quoted.
	734 return new LiteralTerm(value, value, _makeSpan(start));

	735 case TokenKind.LPAREN:
	nweiz 2012/01/04 19:05:41 Is there somewhere the semantics of additions like Is there somewhere the semantics of additions like GroupTerm and ItemTerm are explained?
	736 _next();

	737

	738 GroupTerm group = new GroupTerm(_makeSpan(start));

	739

	740 do {

	741 var term = processTerm();

	742 if (term != null && term is LiteralTerm) {

	743 group.add(term);

	744 }

	745 } while (!_maybeEat(TokenKind.RPAREN));

	746

	747 return group;

	748 case TokenKind.LBRACK:

	749 _next();

	750

	751 var term = processTerm();

	752 if (!(term is NumberTerm)) {

	753 _error('Expecting a positive number', _makeSpan(start));

	754 }

	755

	756 _eat(TokenKind.RBRACK);

	757

	758 return new ItemTerm(term.value, term.text, _makeSpan(start));

	759 case TokenKind.IDENTIFIER:

	760 var nameValue = identifier(); // Snarf up the ident we'll remap, maybe.

	761

	762 if (_maybeEat(TokenKind.LPAREN)) {

	763 // FUNCTION

	764 return processFunction(nameValue);

	765 } else {

	766 // What kind of identifier is it?

	767 int value;

	768 try {

	769 // Named color?

	770 value = TokenKind.matchColorName(nameValue.name);

	771

	772 // Yes, process the color as an RGB value.

	773 String rgbColor = TokenKind.decimalToHex(value);

	774 int value;

	775 try {

	776 value = parseHex(rgbColor);

	777 } catch (HexNumberException hne) {

	778 _error('Bad hex number', _makeSpan(start));

	779 }

	780 return new HexColorTerm(value, rgbColor, _makeSpan(start));

	781 } catch (var error) {

	782 if (error is NoColorMatchException) {

	783 // Other named things to match with validator?

	784 // TODO(terry): TBD

	785 // _error('Unknown property value ${error.name}', _makeSpan(start));

	786

	787 value = nameValue.name;

	788 print('Warning: unknown property value ${error.name}');
	nweiz 2012/01/04 19:05:41 I don't understand this warning. There are tons of I don't understand this warning. There are tons of identifiers other than color names that are valid in properties. Syntactically, every identifier is valid.
	789 return new LiteralTerm(nameValue, nameValue.name, _makeSpan(start));

	790

	791 }

	792 }

	793 }

	794 }

	795

	796 var term;

	797 var unitType = this._peek();

	798

	799 switch (unitType) {
	nweiz 2012/01/04 19:05:41 Manually enumerating all the possible unit types, Manually enumerating all the possible unit types, and the semantics of the numbers that use them, seems like a huge amount of effort that buys you nothing. The way Sass does it is to support any identifier (as well as "%") as a unit. This seems much more robust and future-proof, while also requiring much less code.
	800 case TokenKind.UNIT_EM:

	801 term = new EmTerm(value, t.text, _makeSpan(start));

	802 _next(); // Skip the unit

	803 break;

	804 case TokenKind.UNIT_EX:

	805 term = new ExTerm(value, t.text, _makeSpan(start));

	806 _next(); // Skip the unit

	807 break;

	808 case TokenKind.UNIT_LENGTH_PX:

	809 case TokenKind.UNIT_LENGTH_CM:

	810 case TokenKind.UNIT_LENGTH_MM:

	811 case TokenKind.UNIT_LENGTH_IN:

	812 case TokenKind.UNIT_LENGTH_PT:

	813 case TokenKind.UNIT_LENGTH_PC:

	814 term = new LengthTerm(value, t.text, _makeSpan(start), unitType);

	815 _next(); // Skip the unit

	816 break;

	817 case TokenKind.UNIT_ANGLE_DEG:

	818 case TokenKind.UNIT_ANGLE_RAD:

	819 case TokenKind.UNIT_ANGLE_GRAD:

	820 term = new AngleTerm(value, t.text, _makeSpan(start), unitType);

	821 _next(); // Skip the unit

	822 break;

	823 case TokenKind.UNIT_TIME_MS:

	824 case TokenKind.UNIT_TIME_S:

	825 term = new TimeTerm(value, t.text, _makeSpan(start), unitType);

	826 _next(); // Skip the unit

	827 break;

	828 case TokenKind.UNIT_FREQ_HZ:

	829 case TokenKind.UNIT_FREQ_KHZ:

	830 term = new FreqTerm(value, t.text, _makeSpan(start), unitType);

	831 _next(); // Skip the unit

	832 break;

	833 case TokenKind.PERCENT:

	834 term = new PercentageTerm(value, t.text, _makeSpan(start));

	835 _next(); // Skip the %

	836 break;

	837 case TokenKind.UNIT_FRACTION:

	838 term = new FractionTerm(value, t.text, _makeSpan(start));

	839 _next(); // Skip the unit

	840 break;

	841 default:

	842 if (value != null) {

	843 term = new NumberTerm(value, t.text, _makeSpan(start));

	844 }

	845 }

	846

	847 return term;

	848 }

	849

	850 processQuotedString([bool urlString = false]) {
	nweiz 2012/01/04 19:05:41 Why are you parsing strings in the parser and not Why are you parsing strings in the parser and not the tokenizer? This seems like not only a violation of layering (strings are conceptually a single token) but also very error-prone, as you rely on everything in the string being tokenizable in order for the parse to work.
	851 int start = _peekToken.start;

	852

	853 // URI term sucks up everything inside of quotes(' or ") or between parens

	854 int stopToken = urlString ? TokenKind.RPAREN : -1;

	855 switch (_peek()) {

	856 case TokenKind.SINGLE_QUOTE:

	857 stopToken = TokenKind.SINGLE_QUOTE;

	858 _next(); // Skip the SINGLE_QUOTE.

	859 break;

	860 case TokenKind.DOUBLE_QUOTE:

	861 stopToken = TokenKind.DOUBLE_QUOTE;

	862 _next(); // Skip the DOUBLE_QUOTE.

	863 break;

	864 default:

	865 if (urlString) {

	866 stopToken = TokenKind.RPAREN;

	867 } else {

	868 _error('unexpected string', _makeSpan(start));

	869 }

	870 }

	871

	872 StringBuffer stringValue = new StringBuffer();

	873

	874 // Gobble up everything until we hit our stop token.

	875 int runningStart = _peekToken.start;

	876 while (_peek() != stopToken && _peek() != TokenKind.END_OF_FILE) {

	877 var tok = _next();

	878 stringValue.add(tok.text);

	879 }

	880

	881 if (stopToken != TokenKind.RPAREN) {

	882 _next(); // Skip the SINGLE_QUOTE or DOUBLE_QUOTE;

	883 }

	884

	885 return stringValue.toString();

	886 }

	887

	888 // Function grammar:

	889 //

	890 // function: IDENT '(' expr ')'

	891 //

	892 processFunction(Identifier func) {

	893 int start = _peekToken.start;

	894

	895 String name = func.name;

	896

	897 switch (name) {

	898 case 'url':
	nweiz 2012/01/04 19:05:41 It seems wrong that url() is being parsed in proce It seems wrong that url() is being parsed in processFunction when it's not semantically a function and it only superficially resembles one syntactically. It seems much cleaner to me to tokenize each url().
	899 // URI term sucks up everything inside of quotes(' or ") or between parens

	900 String urlParam = processQuotedString(true);

	901

	902 // TODO(terry): Better error messge and checking for mismatched quotes.

	903 if (_peek() == TokenKind.END_OF_FILE) {

	904 _error("problem parsing URI", _peekToken.span);

	905 }

	906

	907 if (_peek() == TokenKind.RPAREN) {

	908 _next();

	909 }

	910

	911 return new UriTerm(urlParam, _makeSpan(start));

	912 case 'calc':

	913 // TODO(terry): Implement expression handling...

	914 break;

	915 default:

	916 var expr = processExpr();

	917 if (!_maybeEat(TokenKind.RPAREN)) {

	918 _error("problem parsing function expected ), ", _peekToken.span);

	919 }

	920

	921 return new FunctionTerm(name, name, expr, _makeSpan(start));

	922 }

	923

	924 return null;

384 }	925 }

385	926

386 identifier() {	927 identifier() {

387 var tok = _next();	928 var tok = _next();

388 if (!TokenKind.isIdentifier(tok.kind)) {	929 if (!TokenKind.isIdentifier(tok.kind)) {

389 _error('expected identifier, but found $tok', tok.span);	930 _error('expected identifier, but found $tok', tok.span);

390 }	931 }

391	932

392 return new Identifier(tok.text, _makeSpan(tok.start));	933 return new Identifier(tok.text, _makeSpan(tok.start));

393 }	934 }

	935

	936 // TODO(terry): Move this to base <= 36 and into shared code.

	937 static int _hexDigit(int c) {

	938 if(c >= 48/0/ && c <= 57/9/) {

	939 return c - 48;

	940 } else if (c >= 97/a/ && c <= 102/f/) {

	941 return c - 87;

	942 } else if (c >= 65/A/ && c <= 70/F/) {

	943 return c - 55;

	944 } else {

	945 return -1;

	946 }

	947 }

	948

	949 static int parseHex(String hex) {

	950 var result = 0;

	951

	952 for (int i = 0; i < hex.length; i++) {

	953 var digit = _hexDigit(hex.charCodeAt(i));

	954 if (digit < 0) {

	955 throw new HexNumberException();

	956 }

	957 result = (result << 4) + digit;

	958 }

	959

	960 return result;

	961 }

394 }	962 }

	963

	964 /** Not a hex number. */

	965 class HexNumberException implements Exception {

	966 HexNumberException();

	967 }

	968

OLD	NEW

« utils/css/generate.dart ('K') | « utils/css/generate.dart ('k') | utils/css/test.dart » ('j') | utils/css/tree.dart » ('J')