Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(201)

Side by Side Diff: csslib/lib/src/tokenizer.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « csslib/lib/src/token.dart ('k') | csslib/lib/src/tokenizer_base.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 part of csslib.parser;
6
7 class Tokenizer extends TokenizerBase {
8 /** U+ prefix for unicode characters. */
9 final UNICODE_U = 'U'.codeUnitAt(0);
10 final UNICODE_LOWER_U = 'u'.codeUnitAt(0);
11 final UNICODE_PLUS = '+'.codeUnitAt(0);
12
13 final QUESTION_MARK = '?'.codeUnitAt(0);
14
15 /** CDATA keyword. */
16 final List CDATA_NAME = 'CDATA'.codeUnits;
17
18 Tokenizer(SourceFile file, String text, bool skipWhitespace, [int index = 0])
19 : super(file, text, skipWhitespace, index);
20
21 Token next({unicodeRange: false}) {
22 // keep track of our starting position
23 _startIndex = _index;
24
25 int ch;
26 ch = _nextChar();
27 switch (ch) {
28 case TokenChar.NEWLINE:
29 case TokenChar.RETURN:
30 case TokenChar.SPACE:
31 case TokenChar.TAB:
32 return finishWhitespace();
33 case TokenChar.END_OF_FILE:
34 return _finishToken(TokenKind.END_OF_FILE);
35 case TokenChar.AT:
36 int peekCh = _peekChar();
37 if (TokenizerHelpers.isIdentifierStart(peekCh)) {
38 var oldIndex = _index;
39 var oldStartIndex = _startIndex;
40
41 _startIndex = _index;
42 ch = _nextChar();
43 finishIdentifier();
44
45 // Is it a directive?
46 int tokId = TokenKind.matchDirectives(
47 _text, _startIndex, _index - _startIndex);
48 if (tokId == -1) {
49 // No, is it a margin directive?
50 tokId = TokenKind.matchMarginDirectives(
51 _text, _startIndex, _index - _startIndex);
52 }
53
54 if (tokId != -1) {
55 return _finishToken(tokId);
56 } else {
57 // Didn't find a CSS directive or margin directive so the @name is
58 // probably the Less definition '@name: value_variable_definition'.
59 _startIndex = oldStartIndex;
60 _index = oldIndex;
61 }
62 }
63 return _finishToken(TokenKind.AT);
64 case TokenChar.DOT:
65 int start = _startIndex; // Start where the dot started.
66 if (maybeEatDigit()) {
67 // looks like a number dot followed by digit(s).
68 Token number = finishNumber();
69 if (number.kind == TokenKind.INTEGER) {
70 // It's a number but it's preceeded by a dot, so make it a double.
71 _startIndex = start;
72 return _finishToken(TokenKind.DOUBLE);
73 } else {
74 // Don't allow dot followed by a double (e.g, '..1').
75 return _errorToken();
76 }
77 }
78 // It's really a dot.
79 return _finishToken(TokenKind.DOT);
80 case TokenChar.LPAREN:
81 return _finishToken(TokenKind.LPAREN);
82 case TokenChar.RPAREN:
83 return _finishToken(TokenKind.RPAREN);
84 case TokenChar.LBRACE:
85 return _finishToken(TokenKind.LBRACE);
86 case TokenChar.RBRACE:
87 return _finishToken(TokenKind.RBRACE);
88 case TokenChar.LBRACK:
89 return _finishToken(TokenKind.LBRACK);
90 case TokenChar.RBRACK:
91 if (_maybeEatChar(TokenChar.RBRACK) &&
92 _maybeEatChar(TokenChar.GREATER)) {
93 // ]]>
94 return next();
95 }
96 return _finishToken(TokenKind.RBRACK);
97 case TokenChar.HASH:
98 return _finishToken(TokenKind.HASH);
99 case TokenChar.PLUS:
100 if (maybeEatDigit()) return finishNumber();
101 return _finishToken(TokenKind.PLUS);
102 case TokenChar.MINUS:
103 if (inSelectorExpression || unicodeRange) {
104 // If parsing in pseudo function expression then minus is an operator
105 // not part of identifier e.g., interval value range (e.g. U+400-4ff)
106 // or minus operator in selector expression.
107 return _finishToken(TokenKind.MINUS);
108 } else if (maybeEatDigit()) {
109 return finishNumber();
110 } else if (TokenizerHelpers.isIdentifierStart(ch)) {
111 return finishIdentifier();
112 }
113 return _finishToken(TokenKind.MINUS);
114 case TokenChar.GREATER:
115 return _finishToken(TokenKind.GREATER);
116 case TokenChar.TILDE:
117 if (_maybeEatChar(TokenChar.EQUALS)) {
118 return _finishToken(TokenKind.INCLUDES); // ~=
119 }
120 return _finishToken(TokenKind.TILDE);
121 case TokenChar.ASTERISK:
122 if (_maybeEatChar(TokenChar.EQUALS)) {
123 return _finishToken(TokenKind.SUBSTRING_MATCH); // *=
124 }
125 return _finishToken(TokenKind.ASTERISK);
126 case TokenChar.AMPERSAND:
127 return _finishToken(TokenKind.AMPERSAND);
128 case TokenChar.NAMESPACE:
129 if (_maybeEatChar(TokenChar.EQUALS)) {
130 return _finishToken(TokenKind.DASH_MATCH); // |=
131 }
132 return _finishToken(TokenKind.NAMESPACE);
133 case TokenChar.COLON:
134 return _finishToken(TokenKind.COLON);
135 case TokenChar.COMMA:
136 return _finishToken(TokenKind.COMMA);
137 case TokenChar.SEMICOLON:
138 return _finishToken(TokenKind.SEMICOLON);
139 case TokenChar.PERCENT:
140 return _finishToken(TokenKind.PERCENT);
141 case TokenChar.SINGLE_QUOTE:
142 return _finishToken(TokenKind.SINGLE_QUOTE);
143 case TokenChar.DOUBLE_QUOTE:
144 return _finishToken(TokenKind.DOUBLE_QUOTE);
145 case TokenChar.SLASH:
146 if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment();
147 return _finishToken(TokenKind.SLASH);
148 case TokenChar.LESS: // <!--
149 if (_maybeEatChar(TokenChar.BANG)) {
150 if (_maybeEatChar(TokenChar.MINUS) &&
151 _maybeEatChar(TokenChar.MINUS)) {
152 return finishMultiLineComment();
153 } else if (_maybeEatChar(TokenChar.LBRACK) &&
154 _maybeEatChar(CDATA_NAME[0]) &&
155 _maybeEatChar(CDATA_NAME[1]) &&
156 _maybeEatChar(CDATA_NAME[2]) &&
157 _maybeEatChar(CDATA_NAME[3]) &&
158 _maybeEatChar(CDATA_NAME[4]) &&
159 _maybeEatChar(TokenChar.LBRACK)) {
160 // <![CDATA[
161 return next();
162 }
163 }
164 return _finishToken(TokenKind.LESS);
165 case TokenChar.EQUALS:
166 return _finishToken(TokenKind.EQUALS);
167 case TokenChar.CARET:
168 if (_maybeEatChar(TokenChar.EQUALS)) {
169 return _finishToken(TokenKind.PREFIX_MATCH); // ^=
170 }
171 return _finishToken(TokenKind.CARET);
172 case TokenChar.DOLLAR:
173 if (_maybeEatChar(TokenChar.EQUALS)) {
174 return _finishToken(TokenKind.SUFFIX_MATCH); // $=
175 }
176 return _finishToken(TokenKind.DOLLAR);
177 case TokenChar.BANG:
178 Token tok = finishIdentifier();
179 return (tok == null) ? _finishToken(TokenKind.BANG) : tok;
180 default:
181 // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's
182 // appropriate outside of a few specific places; certainly shouldn't
183 // be parsed in selectors.
184 if (!inSelector && ch == TokenChar.BACKSLASH) {
185 return _finishToken(TokenKind.BACKSLASH);
186 }
187
188 if (unicodeRange) {
189 // Three types of unicode ranges:
190 // - single code point (e.g. U+416)
191 // - interval value range (e.g. U+400-4ff)
192 // - range where trailing ‘?’ characters imply ‘any digit value’
193 // (e.g. U+4??)
194 if (maybeEatHexDigit()) {
195 var t = finishHexNumber();
196 // Any question marks then it's a HEX_RANGE not HEX_NUMBER.
197 if (maybeEatQuestionMark()) finishUnicodeRange();
198 return t;
199 } else if (maybeEatQuestionMark()) {
200 // HEX_RANGE U+N???
201 return finishUnicodeRange();
202 } else {
203 return _errorToken();
204 }
205 } else if ((ch == UNICODE_U || ch == UNICODE_LOWER_U) &&
206 (_peekChar() == UNICODE_PLUS)) {
207 // Unicode range: U+uNumber[-U+uNumber]
208 // uNumber = 0..10FFFF
209 _nextChar(); // Skip +
210 _startIndex = _index; // Starts at the number
211 return _finishToken(TokenKind.UNICODE_RANGE);
212 } else if (varDef(ch)) {
213 return _finishToken(TokenKind.VAR_DEFINITION);
214 } else if (varUsage(ch)) {
215 return _finishToken(TokenKind.VAR_USAGE);
216 } else if (TokenizerHelpers.isIdentifierStart(ch)) {
217 return finishIdentifier();
218 } else if (TokenizerHelpers.isDigit(ch)) {
219 return finishNumber();
220 }
221 return _errorToken();
222 }
223 }
224
225 bool varDef(int ch) {
226 return ch == 'v'.codeUnitAt(0) &&
227 _maybeEatChar('a'.codeUnitAt(0)) &&
228 _maybeEatChar('r'.codeUnitAt(0)) &&
229 _maybeEatChar('-'.codeUnitAt(0));
230 }
231
232 bool varUsage(int ch) {
233 return ch == 'v'.codeUnitAt(0) &&
234 _maybeEatChar('a'.codeUnitAt(0)) &&
235 _maybeEatChar('r'.codeUnitAt(0)) &&
236 (_peekChar() == '-'.codeUnitAt(0));
237 }
238
239 Token _errorToken([String message = null]) {
240 return _finishToken(TokenKind.ERROR);
241 }
242
243 int getIdentifierKind() {
244 // Is the identifier a unit type?
245 int tokId = -1;
246
247 // Don't match units in selectors or selector expressions.
248 if (!inSelectorExpression && !inSelector) {
249 tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);
250 }
251 if (tokId == -1) {
252 tokId = (_text.substring(_startIndex, _index) == '!important')
253 ? TokenKind.IMPORTANT
254 : -1;
255 }
256
257 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;
258 }
259
260 Token finishIdentifier() {
261 // If we encounter an escape sequence, remember it so we can post-process
262 // to unescape.
263 var chars = [];
264
265 // backup so we can start with the first character
266 int validateFrom = _index;
267 _index = _startIndex;
268 while (_index < _text.length) {
269 int ch = _text.codeUnitAt(_index);
270
271 // If the previous character was "\" we need to escape. T
272 // http://www.w3.org/TR/CSS21/syndata.html#characters
273 // if followed by hexadecimal digits, create the appropriate character.
274 // otherwise, include the character in the identifier and don't treat it
275 // specially.
276 if (ch == 92 /*\*/ && _inString) {
277 int startHex = ++_index;
278 eatHexDigits(startHex + 6);
279 if (_index != startHex) {
280 // Parse the hex digits and add that character.
281 chars.add(int.parse('0x' + _text.substring(startHex, _index)));
282
283 if (_index == _text.length) break;
284
285 // if we stopped the hex because of a whitespace char, skip it
286 ch = _text.codeUnitAt(_index);
287 if (_index - startHex != 6 &&
288 (ch == TokenChar.SPACE ||
289 ch == TokenChar.TAB ||
290 ch == TokenChar.RETURN ||
291 ch == TokenChar.NEWLINE)) {
292 _index++;
293 }
294 } else {
295 // not a digit, just add the next character literally
296 if (_index == _text.length) break;
297 chars.add(_text.codeUnitAt(_index++));
298 }
299 } else if (_index < validateFrom ||
300 (inSelectorExpression
301 ? TokenizerHelpers.isIdentifierPartExpr(ch)
302 : TokenizerHelpers.isIdentifierPart(ch))) {
303 chars.add(ch);
304 _index++;
305 } else {
306 // Not an identifier or escaped character.
307 break;
308 }
309 }
310
311 var span = _file.span(_startIndex, _index);
312 var text = new String.fromCharCodes(chars);
313
314 return new IdentifierToken(text, getIdentifierKind(), span);
315 }
316
317 Token finishNumber() {
318 eatDigits();
319
320 if (_peekChar() == 46 /*.*/) {
321 // Handle the case of 1.toString().
322 _nextChar();
323 if (TokenizerHelpers.isDigit(_peekChar())) {
324 eatDigits();
325 return _finishToken(TokenKind.DOUBLE);
326 } else {
327 _index -= 1;
328 }
329 }
330
331 return _finishToken(TokenKind.INTEGER);
332 }
333
334 bool maybeEatDigit() {
335 if (_index < _text.length &&
336 TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {
337 _index += 1;
338 return true;
339 }
340 return false;
341 }
342
343 Token finishHexNumber() {
344 eatHexDigits(_text.length);
345 return _finishToken(TokenKind.HEX_INTEGER);
346 }
347
348 void eatHexDigits(int end) {
349 end = math.min(end, _text.length);
350 while (_index < end) {
351 if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
352 _index += 1;
353 } else {
354 return;
355 }
356 }
357 }
358
359 bool maybeEatHexDigit() {
360 if (_index < _text.length &&
361 TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
362 _index += 1;
363 return true;
364 }
365 return false;
366 }
367
368 bool maybeEatQuestionMark() {
369 if (_index < _text.length && _text.codeUnitAt(_index) == QUESTION_MARK) {
370 _index += 1;
371 return true;
372 }
373 return false;
374 }
375
376 void eatQuestionMarks() {
377 while (_index < _text.length) {
378 if (_text.codeUnitAt(_index) == QUESTION_MARK) {
379 _index += 1;
380 } else {
381 return;
382 }
383 }
384 }
385
386 Token finishUnicodeRange() {
387 eatQuestionMarks();
388 return _finishToken(TokenKind.HEX_RANGE);
389 }
390
391 Token finishMultiLineComment() {
392 while (true) {
393 int ch = _nextChar();
394 if (ch == 0) {
395 return _finishToken(TokenKind.INCOMPLETE_COMMENT);
396 } else if (ch == 42 /*'*'*/) {
397 if (_maybeEatChar(47 /*'/'*/)) {
398 if (_inString) {
399 return next();
400 } else {
401 return _finishToken(TokenKind.COMMENT);
402 }
403 }
404 } else if (ch == TokenChar.MINUS) {
405 /* Check if close part of Comment Definition --> (CDC). */
406 if (_maybeEatChar(TokenChar.MINUS)) {
407 if (_maybeEatChar(TokenChar.GREATER)) {
408 if (_inString) {
409 return next();
410 } else {
411 return _finishToken(TokenKind.HTML_COMMENT);
412 }
413 }
414 }
415 }
416 }
417 return _errorToken();
418 }
419 }
420
421 /** Static helper methods. */
422 class TokenizerHelpers {
423 static bool isIdentifierStart(int c) {
424 return isIdentifierStartExpr(c) || c == 45 /*-*/;
425 }
426
427 static bool isDigit(int c) {
428 return (c >= 48 /*0*/ && c <= 57 /*9*/);
429 }
430
431 static bool isHexDigit(int c) {
432 return (isDigit(c) ||
433 (c >= 97 /*a*/ && c <= 102 /*f*/) ||
434 (c >= 65 /*A*/ && c <= 70 /*F*/));
435 }
436
437 static bool isIdentifierPart(int c) {
438 return isIdentifierPartExpr(c) || c == 45 /*-*/;
439 }
440
441 /** Pseudo function expressions identifiers can't have a minus sign. */
442 static bool isIdentifierStartExpr(int c) {
443 return ((c >= 97 /*a*/ && c <= 122 /*z*/) ||
444 (c >= 65 /*A*/ && c <= 90 /*Z*/) ||
445 // Note: Unicode 10646 chars U+00A0 or higher are allowed, see:
446 // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier
447 // http://www.w3.org/TR/CSS21/syndata.html#characters
448 // Also, escaped character should be allowed.
449 c == 95 /*_*/ || c >= 0xA0 || c == 92 /*\*/);
450 }
451
452 /** Pseudo function expressions identifiers can't have a minus sign. */
453 static bool isIdentifierPartExpr(int c) {
454 return (isIdentifierStartExpr(c) || isDigit(c));
455 }
456 }
OLDNEW
« no previous file with comments | « csslib/lib/src/token.dart ('k') | csslib/lib/src/tokenizer_base.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698