Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(481)

Side by Side Diff: pkg/csslib/lib/src/tokenizer.dart

Issue 23168002: move csslib into dart svn (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « pkg/csslib/lib/src/token.dart ('k') | pkg/csslib/lib/src/tokenizer_base.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 part of csslib.parser;
6
7 class Tokenizer extends TokenizerBase {
8 /** U+ prefix for unicode characters. */
9 final UNICODE_U = 'U'.codeUnitAt(0);
10 final UNICODE_LOWER_U = 'u'.codeUnitAt(0);
11 final UNICODE_PLUS = '+'.codeUnitAt(0);
12
13 final QUESTION_MARK = '?'.codeUnitAt(0);
14
15 /** CDATA keyword. */
16 final List CDATA_NAME = 'CDATA'.codeUnits;
17
18 Tokenizer(SourceFile file, String text, bool skipWhitespace,
19 [int index = 0])
20 : super(file, text, skipWhitespace, index);
21
22 Token next({unicodeRange: false}) {
23 // keep track of our starting position
24 _startIndex = _index;
25
26 int ch;
27 ch = _nextChar();
28 switch (ch) {
29 case TokenChar.NEWLINE:
30 case TokenChar.RETURN:
31 case TokenChar.SPACE:
32 case TokenChar.TAB:
33 return finishWhitespace();
34 case TokenChar.END_OF_FILE:
35 return _finishToken(TokenKind.END_OF_FILE);
36 case TokenChar.AT:
37 int peekCh = _peekChar();
38 if (TokenizerHelpers.isIdentifierStart(peekCh)) {
39 var oldIndex = _index;
40 var oldStartIndex = _startIndex;
41
42 _startIndex = _index;
43 ch = _nextChar();
44 Token ident = this.finishIdentifier(ch);
45
46 // Is it a directive?
47 int tokId = TokenKind.matchDirectives(_text, _startIndex,
48 _index - _startIndex);
49 if (tokId == -1) {
50 // No, is it a margin directive?
51 tokId = TokenKind.matchMarginDirectives(_text, _startIndex,
52 _index - _startIndex);
53 }
54
55 if (tokId != -1) {
56 return _finishToken(tokId);
57 } else {
58 // Didn't find a CSS directive or margin directive so the @name is
59 // probably the Less definition '@name: value_variable_definition'.
60 _startIndex = oldStartIndex;
61 _index = oldIndex;
62 }
63 }
64 return _finishToken(TokenKind.AT);
65 case TokenChar.DOT:
66 int start = _startIndex; // Start where the dot started.
67 if (maybeEatDigit()) {
68 // looks like a number dot followed by digit(s).
69 Token number = finishNumber();
70 if (number.kind == TokenKind.INTEGER) {
71 // It's a number but it's preceeded by a dot, so make it a double.
72 _startIndex = start;
73 return _finishToken(TokenKind.DOUBLE);
74 } else {
75 // Don't allow dot followed by a double (e.g, '..1').
76 return _errorToken();
77 }
78 }
79 // It's really a dot.
80 return _finishToken(TokenKind.DOT);
81 case TokenChar.LPAREN:
82 return _finishToken(TokenKind.LPAREN);
83 case TokenChar.RPAREN:
84 return _finishToken(TokenKind.RPAREN);
85 case TokenChar.LBRACE:
86 return _finishToken(TokenKind.LBRACE);
87 case TokenChar.RBRACE:
88 return _finishToken(TokenKind.RBRACE);
89 case TokenChar.LBRACK:
90 return _finishToken(TokenKind.LBRACK);
91 case TokenChar.RBRACK:
92 if (_maybeEatChar(TokenChar.RBRACK) &&
93 _maybeEatChar(TokenChar.GREATER)) {
94 // ]]>
95 return next();
96 }
97 return _finishToken(TokenKind.RBRACK);
98 case TokenChar.HASH:
99 return _finishToken(TokenKind.HASH);
100 case TokenChar.PLUS:
101 if (maybeEatDigit()) return finishNumber();
102 return _finishToken(TokenKind.PLUS);
103 case TokenChar.MINUS:
104 if (selectorExpression || unicodeRange) {
105 // If parsing in pseudo function expression then minus is an operator
106 // not part of identifier e.g., interval value range (e.g. U+400-4ff)
107 // or minus operator in selector expression.
108 return _finishToken(TokenKind.MINUS);
109 } else if (maybeEatDigit()) {
110 return finishNumber();
111 } else if (TokenizerHelpers.isIdentifierStart(ch)) {
112 return this.finishIdentifier(ch);
113 }
114 return _finishToken(TokenKind.MINUS);
115 case TokenChar.GREATER:
116 return _finishToken(TokenKind.GREATER);
117 case TokenChar.TILDE:
118 if (_maybeEatChar(TokenChar.EQUALS)) {
119 return _finishToken(TokenKind.INCLUDES); // ~=
120 }
121 return _finishToken(TokenKind.TILDE);
122 case TokenChar.ASTERISK:
123 if (_maybeEatChar(TokenChar.EQUALS)) {
124 return _finishToken(TokenKind.SUBSTRING_MATCH); // *=
125 }
126 return _finishToken(TokenKind.ASTERISK);
127 case TokenChar.AMPERSAND:
128 return _finishToken(TokenKind.AMPERSAND);
129 case TokenChar.NAMESPACE:
130 return _finishToken(TokenKind.NAMESPACE);
131 case TokenChar.COLON:
132 return _finishToken(TokenKind.COLON);
133 case TokenChar.COMMA:
134 return _finishToken(TokenKind.COMMA);
135 case TokenChar.SEMICOLON:
136 return _finishToken(TokenKind.SEMICOLON);
137 case TokenChar.PERCENT:
138 return _finishToken(TokenKind.PERCENT);
139 case TokenChar.SINGLE_QUOTE:
140 return _finishToken(TokenKind.SINGLE_QUOTE);
141 case TokenChar.DOUBLE_QUOTE:
142 return _finishToken(TokenKind.DOUBLE_QUOTE);
143 case TokenChar.SLASH:
144 if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment();
145 return _finishToken(TokenKind.SLASH);
146 case TokenChar.LESS: // <!--
147 if (_maybeEatChar(TokenChar.BANG)) {
148 if (_maybeEatChar(TokenChar.MINUS) &&
149 _maybeEatChar(TokenChar.MINUS)) {
150 return finishMultiLineComment();
151 } else if (_maybeEatChar(TokenChar.LBRACK) &&
152 _maybeEatChar(CDATA_NAME[0]) &&
153 _maybeEatChar(CDATA_NAME[1]) &&
154 _maybeEatChar(CDATA_NAME[2]) &&
155 _maybeEatChar(CDATA_NAME[3]) &&
156 _maybeEatChar(CDATA_NAME[4]) &&
157 _maybeEatChar(TokenChar.LBRACK)) {
158 // <![CDATA[
159 return next();
160 }
161 }
162 return _finishToken(TokenKind.LESS);
163 case TokenChar.EQUALS:
164 return _finishToken(TokenKind.EQUALS);
165 case TokenChar.OR:
166 if (_maybeEatChar(TokenChar.EQUALS)) {
167 return _finishToken(TokenKind.DASH_MATCH); // |=
168 }
169 return _finishToken(TokenKind.OR);
170 case TokenChar.CARET:
171 if (_maybeEatChar(TokenChar.EQUALS)) {
172 return _finishToken(TokenKind.PREFIX_MATCH); // ^=
173 }
174 return _finishToken(TokenKind.CARET);
175 case TokenChar.DOLLAR:
176 if (_maybeEatChar(TokenChar.EQUALS)) {
177 return _finishToken(TokenKind.SUFFIX_MATCH); // $=
178 }
179 return _finishToken(TokenKind.DOLLAR);
180 case TokenChar.BANG:
181 Token tok = finishIdentifier(ch);
182 return (tok == null) ? _finishToken(TokenKind.BANG) : tok;
183 case TokenChar.BACKSLASH:
184 return _finishToken(TokenKind.BACKSLASH);
185 default:
186 if (unicodeRange) {
187 // Three types of unicode ranges:
188 // - single code point (e.g. U+416)
189 // - interval value range (e.g. U+400-4ff)
190 // - range where trailing ‘?’ characters imply ‘any digit value’
191 // (e.g. U+4??)
192 if (maybeEatHexDigit()) {
193 var t = finishHexNumber();
194 // Any question marks then it's a HEX_RANGE not HEX_NUMBER.
195 if (maybeEatQuestionMark()) finishUnicodeRange();
196 return t;
197 } else if (maybeEatQuestionMark()) {
198 // HEX_RANGE U+N???
199 return finishUnicodeRange();
200 } else {
201 return _errorToken();
202 }
203 } else if ((ch == UNICODE_U || ch == UNICODE_LOWER_U) &&
204 (_peekChar() == UNICODE_PLUS)) {
205 // Unicode range: U+uNumber[-U+uNumber]
206 // uNumber = 0..10FFFF
207 _nextChar(); // Skip +
208 _startIndex = _index; // Starts at the number
209 return _finishToken(TokenKind.UNICODE_RANGE);
210 } else if (varDef(ch)) {
211 return _finishToken(TokenKind.VAR_DEFINITION);
212 } else if (varUsage(ch)) {
213 return _finishToken(TokenKind.VAR_USAGE);
214 } else if (TokenizerHelpers.isIdentifierStart(ch)) {
215 return finishIdentifier(ch);
216 } else if (TokenizerHelpers.isDigit(ch)) {
217 return finishNumber();
218 }
219 return _errorToken();
220 }
221 }
222
223 bool varDef(int ch) {
224 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) &&
225 _maybeEatChar('r'.codeUnitAt(0)) && _maybeEatChar('-'.codeUnitAt(0));
226 }
227
228 bool varUsage(int ch) {
229 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) &&
230 _maybeEatChar('r'.codeUnitAt(0)) && (_peekChar() == '-'.codeUnitAt(0));
231 }
232
233 Token _errorToken([String message = null]) {
234 return _finishToken(TokenKind.ERROR);
235 }
236
237 int getIdentifierKind() {
238 // Is the identifier a unit type?
239 int tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);
240 if (tokId == -1) {
241 tokId = (_text.substring(_startIndex, _index) == '!important') ?
242 TokenKind.IMPORTANT : -1;
243 }
244
245 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;
246 }
247
248 // Need to override so CSS version of isIdentifierPart is used.
249 Token finishIdentifier(int ch) {
250 while (_index < _text.length) {
251 // If parsing in pseudo function expression then minus is an operator
252 // not part of identifier.
253 var isIdentifier = selectorExpression
254 ? TokenizerHelpers.isIdentifierPartExpr(_text.codeUnitAt(_index))
255 : TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index));
256 if (!isIdentifier) {
257 break;
258 } else {
259 _index += 1;
260 }
261 }
262
263 int kind = getIdentifierKind();
264 if (kind == TokenKind.IDENTIFIER) {
265 return _finishToken(TokenKind.IDENTIFIER);
266 } else {
267 return _finishToken(kind);
268 }
269 }
270
271 Token finishImportant() {
272
273 }
274
275 Token finishNumber() {
276 eatDigits();
277
278 if (_peekChar() == 46/*.*/) {
279 // Handle the case of 1.toString().
280 _nextChar();
281 if (TokenizerHelpers.isDigit(_peekChar())) {
282 eatDigits();
283 return _finishToken(TokenKind.DOUBLE);
284 } else {
285 _index -= 1;
286 }
287 }
288
289 return _finishToken(TokenKind.INTEGER);
290 }
291
292 bool maybeEatDigit() {
293 if (_index < _text.length
294 && TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {
295 _index += 1;
296 return true;
297 }
298 return false;
299 }
300
301 Token finishHexNumber() {
302 eatHexDigits();
303 return _finishToken(TokenKind.HEX_INTEGER);
304 }
305
306 void eatHexDigits() {
307 while (_index < _text.length) {
308 if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
309 _index += 1;
310 } else {
311 return;
312 }
313 }
314 }
315
316 bool maybeEatHexDigit() {
317 if (_index < _text.length
318 && TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
319 _index += 1;
320 return true;
321 }
322 return false;
323 }
324
325 bool maybeEatQuestionMark() {
326 if (_index < _text.length &&
327 _text.codeUnitAt(_index) == QUESTION_MARK) {
328 _index += 1;
329 return true;
330 }
331 return false;
332 }
333
334 void eatQuestionMarks() {
335 while (_index < _text.length) {
336 if (_text.codeUnitAt(_index) == QUESTION_MARK) {
337 _index += 1;
338 } else {
339 return;
340 }
341 }
342 }
343
344 Token finishUnicodeRange() {
345 eatQuestionMarks();
346 return _finishToken(TokenKind.HEX_RANGE);
347 }
348
349 Token finishMultiLineComment() {
350 while (true) {
351 int ch = _nextChar();
352 if (ch == 0) {
353 return _finishToken(TokenKind.INCOMPLETE_COMMENT);
354 } else if (ch == 42/*'*'*/) {
355 if (_maybeEatChar(47/*'/'*/)) {
356 if (_skipWhitespace) {
357 return next();
358 } else {
359 return _finishToken(TokenKind.COMMENT);
360 }
361 }
362 } else if (ch == TokenChar.MINUS) {
363 /* Check if close part of Comment Definition --> (CDC). */
364 if (_maybeEatChar(TokenChar.MINUS)) {
365 if (_maybeEatChar(TokenChar.GREATER)) {
366 if (_skipWhitespace) {
367 return next();
368 } else {
369 return _finishToken(TokenKind.HTML_COMMENT);
370 }
371 }
372 }
373 }
374 }
375 return _errorToken();
376 }
377
378 }
379
380 /** Static helper methods. */
381 class TokenizerHelpers {
382 static bool isIdentifierStart(int c) {
383 return isIdentifierStartExpr(c) || c == 45 /*-*/;
384 }
385
386 static bool isDigit(int c) {
387 return (c >= 48/*0*/ && c <= 57/*9*/);
388 }
389
390 static bool isHexDigit(int c) {
391 return (isDigit(c) || (c >= 97/*a*/ && c <= 102/*f*/)
392 || (c >= 65/*A*/ && c <= 70/*F*/));
393 }
394
395 static bool isIdentifierPart(int c) {
396 return isIdentifierPartExpr(c) || c == 45 /*-*/;
397 }
398
399 /** Pseudo function expressions identifiers can't have a minus sign. */
400 static bool isIdentifierStartExpr(int c) {
401 return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) ||
402 c == 95/*_*/);
403 }
404
405 /** Pseudo function expressions identifiers can't have a minus sign. */
406 static bool isIdentifierPartExpr(int c) {
407 return (isIdentifierStartExpr(c) || isDigit(c));
408 }
409 }
OLDNEW
« no previous file with comments | « pkg/csslib/lib/src/token.dart ('k') | pkg/csslib/lib/src/tokenizer_base.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698