Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(332)

Side by Side Diff: utils/css/tokenizer.dart

Issue 8937017: New CSS parser written in Dart to replace pyparser (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Put back for DartC Created 9 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 class Tokenizer extends lang.TokenizerBase { 5 class Tokenizer extends lang.TokenizerBase {
6 TokenKind cssTokens; 6 TokenKind cssTokens;
7 7
8 bool _selectorParsing; 8 bool _selectorParsing;
9 9
10 Tokenizer(lang.SourceFile source, bool skipWhitespace, [int index = 0]) 10 Tokenizer(lang.SourceFile source, bool skipWhitespace, [int index = 0])
(...skipping 26 matching lines...) Expand all
37 case cssTokens.tokens[TokenKind.SPACE]: 37 case cssTokens.tokens[TokenKind.SPACE]:
38 case cssTokens.tokens[TokenKind.TAB]: 38 case cssTokens.tokens[TokenKind.TAB]:
39 case cssTokens.tokens[TokenKind.NEWLINE]: 39 case cssTokens.tokens[TokenKind.NEWLINE]:
40 case cssTokens.tokens[TokenKind.RETURN]: 40 case cssTokens.tokens[TokenKind.RETURN]:
41 return finishWhitespace(); 41 return finishWhitespace();
42 case cssTokens.tokens[TokenKind.END_OF_FILE]: 42 case cssTokens.tokens[TokenKind.END_OF_FILE]:
43 return _finishToken(TokenKind.END_OF_FILE); 43 return _finishToken(TokenKind.END_OF_FILE);
44 case cssTokens.tokens[TokenKind.AT]: 44 case cssTokens.tokens[TokenKind.AT]:
45 return _finishToken(TokenKind.AT); 45 return _finishToken(TokenKind.AT);
46 case cssTokens.tokens[TokenKind.DOT]: 46 case cssTokens.tokens[TokenKind.DOT]:
47 return _finishToken(TokenKind.DOT); 47 int start = _startIndex; // Start where the dot started.
48 if (maybeEatDigit()) {
49 // looks like a number dot followed by digit(s).
50 lang.Token num = finishNumber();
51 if (num.kind == TokenKind.INTEGER) {
52 // It's a number but it's preceeded by a dot, so make it a double.
53 _startIndex = start;
54 return _finishToken(TokenKind.DOUBLE);
55 } else {
56 // Don't allow dot followed by a double (e.g, '..1').
57 return _errorToken();
58 }
59 } else {
60 // It's really a dot.
61 return _finishToken(TokenKind.DOT);
62 }
63 case cssTokens.tokens[TokenKind.LPAREN]:
64 return _finishToken(TokenKind.LPAREN);
65 case cssTokens.tokens[TokenKind.RPAREN]:
66 return _finishToken(TokenKind.RPAREN);
48 case cssTokens.tokens[TokenKind.LBRACE]: 67 case cssTokens.tokens[TokenKind.LBRACE]:
49 return _finishToken(TokenKind.LBRACE); 68 return _finishToken(TokenKind.LBRACE);
50 case cssTokens.tokens[TokenKind.RBRACE]: 69 case cssTokens.tokens[TokenKind.RBRACE]:
51 return _finishToken(TokenKind.RBRACE); 70 return _finishToken(TokenKind.RBRACE);
71 case cssTokens.tokens[TokenKind.LBRACK]:
72 return _finishToken(TokenKind.LBRACK);
73 case cssTokens.tokens[TokenKind.RBRACK]:
74 return _finishToken(TokenKind.RBRACK);
52 case cssTokens.tokens[TokenKind.HASH]: 75 case cssTokens.tokens[TokenKind.HASH]:
53 return _finishToken(TokenKind.HASH); 76 return _finishToken(TokenKind.HASH);
54 case cssTokens.tokens[TokenKind.COMBINATOR_PLUS]: 77 case cssTokens.tokens[TokenKind.PLUS]:
55 return _finishToken(TokenKind.COMBINATOR_PLUS); 78 if (maybeEatDigit()) {
56 case cssTokens.tokens[TokenKind.COMBINATOR_GREATER]: 79 return finishNumber();
57 return _finishToken(TokenKind.COMBINATOR_GREATER); 80 } else {
58 case cssTokens.tokens[TokenKind.COMBINATOR_TILDE]: 81 return _finishToken(TokenKind.PLUS);
59 return _finishToken(TokenKind.COMBINATOR_TILDE); 82 }
83 case cssTokens.tokens[TokenKind.MINUS]:
84 if (maybeEatDigit()) {
85 return finishNumber();
86 } else if (TokenizerHelpers.isIdentifierStart(ch)) {
87 return this.finishIdentifier();
88 } else {
89 return _finishToken(TokenKind.MINUS);
90 }
91 case cssTokens.tokens[TokenKind.GREATER]:
92 return _finishToken(TokenKind.GREATER);
93 case cssTokens.tokens[TokenKind.TILDE]:
94 if (_maybeEatChar(cssTokens.tokens[TokenKind.EQUALS])) {
95 return _finishToken(TokenKind.INCLUDES); // ~=
96 } else {
97 return _finishToken(TokenKind.TILDE);
98 }
60 case cssTokens.tokens[TokenKind.ASTERISK]: 99 case cssTokens.tokens[TokenKind.ASTERISK]:
61 return _finishToken(TokenKind.ASTERISK); 100 if (_maybeEatChar(cssTokens.tokens[TokenKind.EQUALS])) {
101 return _finishToken(TokenKind.SUBSTRING_MATCH); // *=
102 } else {
103 return _finishToken(TokenKind.ASTERISK);
104 }
62 case cssTokens.tokens[TokenKind.NAMESPACE]: 105 case cssTokens.tokens[TokenKind.NAMESPACE]:
63 return _finishToken(TokenKind.NAMESPACE); 106 return _finishToken(TokenKind.NAMESPACE);
64 case cssTokens.tokens[TokenKind.PSEUDO]: 107 case cssTokens.tokens[TokenKind.COLON]:
65 return _finishToken(TokenKind.PSEUDO); 108 return _finishToken(TokenKind.COLON);
66 case cssTokens.tokens[TokenKind.COMMA]: 109 case cssTokens.tokens[TokenKind.COMMA]:
67 return _finishToken(TokenKind.COMMA); 110 return _finishToken(TokenKind.COMMA);
68 111 case cssTokens.tokens[TokenKind.SEMICOLON]:
112 return _finishToken(TokenKind.SEMICOLON);
113 case cssTokens.tokens[TokenKind.PERCENT]:
114 return _finishToken(TokenKind.PERCENT);
115 case cssTokens.tokens[TokenKind.SINGLE_QUOTE]:
116 return _finishToken(TokenKind.SINGLE_QUOTE);
117 case cssTokens.tokens[TokenKind.DOUBLE_QUOTE]:
118 return _finishToken(TokenKind.DOUBLE_QUOTE);
119 case cssTokens.tokens[TokenKind.SLASH]:
120 if (_maybeEatChar(cssTokens.tokens[TokenKind.ASTERISK])) {
121 return finishMultiLineComment();
122 } else {
123 return _finishToken(TokenKind.SLASH);
124 }
125 case cssTokens.tokens[TokenKind.LESS]: // <!--
126 if (_maybeEatChar(cssTokens.tokens[TokenKind.BANG]) &&
127 _maybeEatChar(cssTokens.tokens[TokenKind.MINUS]) &&
128 _maybeEatChar(cssTokens.tokens[TokenKind.MINUS])) {
129 return finishMultiLineComment();
130 } else {
131 return _finishToken(TokenKind.LESS);
132 }
133 case cssTokens.tokens[TokenKind.EQUALS]:
134 return _finishToken(TokenKind.EQUALS);
135 case cssTokens.tokens[TokenKind.OR]:
136 if (_maybeEatChar(cssTokens.tokens[TokenKind.EQUALS])) {
137 return _finishToken(TokenKind.DASH_MATCH); // |=
138 } else {
139 return _finishToken(TokenKind.OR);
140 }
141 case cssTokens.tokens[TokenKind.CARET]:
142 if (_maybeEatChar(cssTokens.tokens[TokenKind.EQUALS])) {
143 return _finishToken(TokenKind.PREFIX_MATCH); // ^=
144 } else {
145 return _finishToken(TokenKind.CARET);
146 }
147 case cssTokens.tokens[TokenKind.DOLLAR]:
148 if (_maybeEatChar(cssTokens.tokens[TokenKind.EQUALS])) {
149 return _finishToken(TokenKind.SUFFIX_MATCH); // $=
150 } else {
151 return _finishToken(TokenKind.DOLLAR);
152 }
153 case cssTokens.tokens[TokenKind.BANG]:
154 lang.Token tok = finishIdentifier();
155 return (tok == null) ? _finishToken(TokenKind.BANG) : tok;
69 default: 156 default:
70 if (isIdentifierStart(ch)) { 157 if (TokenizerHelpers.isIdentifierStart(ch)) {
71 return this.finishIdentifier(); 158 return this.finishIdentifier();
72 } else if (isDigit(ch)) { 159 } else if (isDigit(ch)) {
73 return this.finishNumber(); 160 return this.finishNumber();
74 } else { 161 } else {
75 return _errorToken(); 162 return _errorToken();
76 } 163 }
77 } 164 }
78 } 165 }
79 166
80 // TODO(jmesserly): we need a way to emit human readable error messages from 167 // TODO(jmesserly): we need a way to emit human readable error messages from
81 // the tokenizer. 168 // the tokenizer.
82 lang.Token _errorToken() { 169 lang.Token _errorToken() {
83 return _finishToken(TokenKind.ERROR); 170 return _finishToken(TokenKind.ERROR);
84 } 171 }
85 172
86 int getIdentifierKind() { 173 int getIdentifierKind() {
87 return TokenKind.IDENTIFIER; 174 // Is the identifier a unit type?
175 int tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);
176 if (tokId == -1) {
177 // No, is it a directive?
178 tokId = TokenKind.matchDirectives(_text, _startIndex, _index - _startIndex );
179 }
180 if (tokId == -1) {
181 tokId = (_text.substring(_startIndex, _index) == '!important') ?
182 TokenKind.IMPORTANT : -1;
183 }
184
185 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;
88 } 186 }
89 187
90 // Need to override so CSS version of isIdentifierPart is used. 188 // Need to override so CSS version of isIdentifierPart is used.
91 lang.Token finishIdentifier() { 189 lang.Token finishIdentifier() {
92 while (_index < _text.length) { 190 while (_index < _text.length) {
93 if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) { 191 // if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) {
94 _index--; 192 if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index))) {
193 // _index--;
95 break; 194 break;
195 } else {
196 _index += 1;
96 } 197 }
97 } 198 }
98 int kind = getIdentifierKind();
99 if (_interpStack != null && _interpStack.depth == -1) { 199 if (_interpStack != null && _interpStack.depth == -1) {
100 _interpStack.depth = 0; 200 _interpStack.depth = 0;
101 } 201 }
202 int kind = getIdentifierKind();
102 if (kind == TokenKind.IDENTIFIER) { 203 if (kind == TokenKind.IDENTIFIER) {
103 return _finishToken(TokenKind.IDENTIFIER); 204 return _finishToken(TokenKind.IDENTIFIER);
104 } else { 205 } else {
105 return _finishToken(kind); 206 return _finishToken(kind);
106 } 207 }
107 } 208 }
209
210 lang.Token finishImportant() {
211
212 }
213
214 lang.Token finishNumber() {
215 eatDigits();
216
217 if (_peekChar() == 46/*.*/) {
218 // Handle the case of 1.toString().
219 _nextChar();
220 if (isDigit(_peekChar())) {
221 eatDigits();
222 return _finishToken(TokenKind.DOUBLE);
223 } else {
224 _index -= 1;
225 }
226 }
227
228 return _finishToken(TokenKind.INTEGER);
229 }
230
231 bool maybeEatDigit() {
232 if (_index < _text.length && isDigit(_text.charCodeAt(_index))) {
233 _index += 1;
234 return true;
235 }
236 return false;
237 }
238
239 void eatHexDigits() {
240 while (_index < _text.length) {
241 if (isHexDigit(_text.charCodeAt(_index))) {
242 _index += 1;
243 } else {
244 return;
245 }
246 }
247 }
248
249 bool maybeEatHexDigit() {
250 if (_index < _text.length && isHexDigit(_text.charCodeAt(_index))) {
251 _index += 1;
252 return true;
253 }
254 return false;
255 }
256
257 lang.Token finishMultiLineComment() {
258 while (true) {
259 int ch = _nextChar();
260 if (ch == 0) {
261 return _finishToken(TokenKind.INCOMPLETE_COMMENT);
262 } else if (ch == 42/*'*'*/) {
263 if (_maybeEatChar(47/*'/'*/)) {
264 if (_skipWhitespace) {
265 return next();
266 } else {
267 return _finishToken(TokenKind.COMMENT);
268 }
269 }
270 } else if (ch == cssTokens.tokens[TokenKind.MINUS]) {
271 /* Check if close part of Comment Definition --> (CDC). */
272 if (_maybeEatChar(cssTokens.tokens[TokenKind.MINUS])) {
273 if (_maybeEatChar(cssTokens.tokens[TokenKind.GREATER])) {
274 if (_skipWhitespace) {
275 return next();
276 } else {
277 return _finishToken(TokenKind.HTML_COMMENT);
278 }
279 }
280 }
281 }
282 }
283 return _errorToken();
284 }
285
108 } 286 }
109 287
110 /** Static helper methods. */ 288 /** Static helper methods. */
111 class TokenizerHelpers { 289 class TokenizerHelpers {
112 static bool isIdentifierStart(int c) => 290 static bool isIdentifierStart(int c) =>
113 lang.TokenizerHelpers.isIdentifierStart(c) || c == 95 /*_*/; 291 lang.TokenizerHelpers.isIdentifierStart(c) || c == 95 /*_*/ ||
292 c == 45; /*-*/
114 293
115 static bool isDigit(int c) => lang.TokenizerHelpers.isDigit(c); 294 static bool isDigit(int c) => lang.TokenizerHelpers.isDigit(c);
116 295
117 static bool isHexDigit(int c) => lang.TokenizerHelpers.isHexDigit(c); 296 static bool isHexDigit(int c) => lang.TokenizerHelpers.isHexDigit(c);
118 297
119 static bool isWhitespace(int c) => lang.TokenizerHelpers.isWhitespace(c); 298 static bool isWhitespace(int c) => lang.TokenizerHelpers.isWhitespace(c);
120 299
121 static bool isIdentifierPart(int c) => 300 static bool isIdentifierPart(int c) =>
122 lang.TokenizerHelpers.isIdentifierPart(c) || c == 45 /*-*/; 301 lang.TokenizerHelpers.isIdentifierPart(c) || c == 45 /*-*/;
123 } 302 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698