Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(98)

Side by Side Diff: observatory_pub_packages/csslib/src/tokenizer.dart

Issue 816693004: Add observatory_pub_packages snapshot to third_party (Closed) Base URL: http://dart.googlecode.com/svn/third_party/
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 part of csslib.parser;
6
7 class Tokenizer extends TokenizerBase {
8 /** U+ prefix for unicode characters. */
9 final UNICODE_U = 'U'.codeUnitAt(0);
10 final UNICODE_LOWER_U = 'u'.codeUnitAt(0);
11 final UNICODE_PLUS = '+'.codeUnitAt(0);
12
13 final QUESTION_MARK = '?'.codeUnitAt(0);
14
15 /** CDATA keyword. */
16 final List CDATA_NAME = 'CDATA'.codeUnits;
17
18 Tokenizer(SourceFile file, String text, bool skipWhitespace,
19 [int index = 0])
20 : super(file, text, skipWhitespace, index);
21
22 Token next({unicodeRange: false}) {
23 // keep track of our starting position
24 _startIndex = _index;
25
26 int ch;
27 ch = _nextChar();
28 switch (ch) {
29 case TokenChar.NEWLINE:
30 case TokenChar.RETURN:
31 case TokenChar.SPACE:
32 case TokenChar.TAB:
33 return finishWhitespace();
34 case TokenChar.END_OF_FILE:
35 return _finishToken(TokenKind.END_OF_FILE);
36 case TokenChar.AT:
37 int peekCh = _peekChar();
38 if (TokenizerHelpers.isIdentifierStart(peekCh)) {
39 var oldIndex = _index;
40 var oldStartIndex = _startIndex;
41
42 _startIndex = _index;
43 ch = _nextChar();
44 Token ident = finishIdentifier();
45
46 // Is it a directive?
47 int tokId = TokenKind.matchDirectives(_text, _startIndex,
48 _index - _startIndex);
49 if (tokId == -1) {
50 // No, is it a margin directive?
51 tokId = TokenKind.matchMarginDirectives(_text, _startIndex,
52 _index - _startIndex);
53 }
54
55 if (tokId != -1) {
56 return _finishToken(tokId);
57 } else {
58 // Didn't find a CSS directive or margin directive so the @name is
59 // probably the Less definition '@name: value_variable_definition'.
60 _startIndex = oldStartIndex;
61 _index = oldIndex;
62 }
63 }
64 return _finishToken(TokenKind.AT);
65 case TokenChar.DOT:
66 int start = _startIndex; // Start where the dot started.
67 if (maybeEatDigit()) {
68 // looks like a number dot followed by digit(s).
69 Token number = finishNumber();
70 if (number.kind == TokenKind.INTEGER) {
71 // It's a number but it's preceeded by a dot, so make it a double.
72 _startIndex = start;
73 return _finishToken(TokenKind.DOUBLE);
74 } else {
75 // Don't allow dot followed by a double (e.g, '..1').
76 return _errorToken();
77 }
78 }
79 // It's really a dot.
80 return _finishToken(TokenKind.DOT);
81 case TokenChar.LPAREN:
82 return _finishToken(TokenKind.LPAREN);
83 case TokenChar.RPAREN:
84 return _finishToken(TokenKind.RPAREN);
85 case TokenChar.LBRACE:
86 return _finishToken(TokenKind.LBRACE);
87 case TokenChar.RBRACE:
88 return _finishToken(TokenKind.RBRACE);
89 case TokenChar.LBRACK:
90 return _finishToken(TokenKind.LBRACK);
91 case TokenChar.RBRACK:
92 if (_maybeEatChar(TokenChar.RBRACK) &&
93 _maybeEatChar(TokenChar.GREATER)) {
94 // ]]>
95 return next();
96 }
97 return _finishToken(TokenKind.RBRACK);
98 case TokenChar.HASH:
99 return _finishToken(TokenKind.HASH);
100 case TokenChar.PLUS:
101 if (maybeEatDigit()) return finishNumber();
102 return _finishToken(TokenKind.PLUS);
103 case TokenChar.MINUS:
104 if (inSelectorExpression || unicodeRange) {
105 // If parsing in pseudo function expression then minus is an operator
106 // not part of identifier e.g., interval value range (e.g. U+400-4ff)
107 // or minus operator in selector expression.
108 return _finishToken(TokenKind.MINUS);
109 } else if (maybeEatDigit()) {
110 return finishNumber();
111 } else if (TokenizerHelpers.isIdentifierStart(ch)) {
112 return finishIdentifier();
113 }
114 return _finishToken(TokenKind.MINUS);
115 case TokenChar.GREATER:
116 return _finishToken(TokenKind.GREATER);
117 case TokenChar.TILDE:
118 if (_maybeEatChar(TokenChar.EQUALS)) {
119 return _finishToken(TokenKind.INCLUDES); // ~=
120 }
121 return _finishToken(TokenKind.TILDE);
122 case TokenChar.ASTERISK:
123 if (_maybeEatChar(TokenChar.EQUALS)) {
124 return _finishToken(TokenKind.SUBSTRING_MATCH); // *=
125 }
126 return _finishToken(TokenKind.ASTERISK);
127 case TokenChar.AMPERSAND:
128 return _finishToken(TokenKind.AMPERSAND);
129 case TokenChar.NAMESPACE:
130 if (_maybeEatChar(TokenChar.EQUALS)) {
131 return _finishToken(TokenKind.DASH_MATCH); // |=
132 }
133 return _finishToken(TokenKind.NAMESPACE);
134 case TokenChar.COLON:
135 return _finishToken(TokenKind.COLON);
136 case TokenChar.COMMA:
137 return _finishToken(TokenKind.COMMA);
138 case TokenChar.SEMICOLON:
139 return _finishToken(TokenKind.SEMICOLON);
140 case TokenChar.PERCENT:
141 return _finishToken(TokenKind.PERCENT);
142 case TokenChar.SINGLE_QUOTE:
143 return _finishToken(TokenKind.SINGLE_QUOTE);
144 case TokenChar.DOUBLE_QUOTE:
145 return _finishToken(TokenKind.DOUBLE_QUOTE);
146 case TokenChar.SLASH:
147 if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment();
148 return _finishToken(TokenKind.SLASH);
149 case TokenChar.LESS: // <!--
150 if (_maybeEatChar(TokenChar.BANG)) {
151 if (_maybeEatChar(TokenChar.MINUS) &&
152 _maybeEatChar(TokenChar.MINUS)) {
153 return finishMultiLineComment();
154 } else if (_maybeEatChar(TokenChar.LBRACK) &&
155 _maybeEatChar(CDATA_NAME[0]) &&
156 _maybeEatChar(CDATA_NAME[1]) &&
157 _maybeEatChar(CDATA_NAME[2]) &&
158 _maybeEatChar(CDATA_NAME[3]) &&
159 _maybeEatChar(CDATA_NAME[4]) &&
160 _maybeEatChar(TokenChar.LBRACK)) {
161 // <![CDATA[
162 return next();
163 }
164 }
165 return _finishToken(TokenKind.LESS);
166 case TokenChar.EQUALS:
167 return _finishToken(TokenKind.EQUALS);
168 case TokenChar.CARET:
169 if (_maybeEatChar(TokenChar.EQUALS)) {
170 return _finishToken(TokenKind.PREFIX_MATCH); // ^=
171 }
172 return _finishToken(TokenKind.CARET);
173 case TokenChar.DOLLAR:
174 if (_maybeEatChar(TokenChar.EQUALS)) {
175 return _finishToken(TokenKind.SUFFIX_MATCH); // $=
176 }
177 return _finishToken(TokenKind.DOLLAR);
178 case TokenChar.BANG:
179 Token tok = finishIdentifier();
180 return (tok == null) ? _finishToken(TokenKind.BANG) : tok;
181 default:
182 // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's
183 // appropriate outside of a few specific places; certainly shouldn't
184 // be parsed in selectors.
185 if (!inSelector && ch == TokenChar.BACKSLASH) {
186 return _finishToken(TokenKind.BACKSLASH);
187 }
188
189 if (unicodeRange) {
190 // Three types of unicode ranges:
191 // - single code point (e.g. U+416)
192 // - interval value range (e.g. U+400-4ff)
193 // - range where trailing ‘?’ characters imply ‘any digit value’
194 // (e.g. U+4??)
195 if (maybeEatHexDigit()) {
196 var t = finishHexNumber();
197 // Any question marks then it's a HEX_RANGE not HEX_NUMBER.
198 if (maybeEatQuestionMark()) finishUnicodeRange();
199 return t;
200 } else if (maybeEatQuestionMark()) {
201 // HEX_RANGE U+N???
202 return finishUnicodeRange();
203 } else {
204 return _errorToken();
205 }
206 } else if ((ch == UNICODE_U || ch == UNICODE_LOWER_U) &&
207 (_peekChar() == UNICODE_PLUS)) {
208 // Unicode range: U+uNumber[-U+uNumber]
209 // uNumber = 0..10FFFF
210 _nextChar(); // Skip +
211 _startIndex = _index; // Starts at the number
212 return _finishToken(TokenKind.UNICODE_RANGE);
213 } else if (varDef(ch)) {
214 return _finishToken(TokenKind.VAR_DEFINITION);
215 } else if (varUsage(ch)) {
216 return _finishToken(TokenKind.VAR_USAGE);
217 } else if (TokenizerHelpers.isIdentifierStart(ch)) {
218 return finishIdentifier();
219 } else if (TokenizerHelpers.isDigit(ch)) {
220 return finishNumber();
221 }
222 return _errorToken();
223 }
224 }
225
226 bool varDef(int ch) {
227 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) &&
228 _maybeEatChar('r'.codeUnitAt(0)) && _maybeEatChar('-'.codeUnitAt(0));
229 }
230
231 bool varUsage(int ch) {
232 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) &&
233 _maybeEatChar('r'.codeUnitAt(0)) && (_peekChar() == '-'.codeUnitAt(0));
234 }
235
236 Token _errorToken([String message = null]) {
237 return _finishToken(TokenKind.ERROR);
238 }
239
240 int getIdentifierKind() {
241 // Is the identifier a unit type?
242 int tokId = -1;
243
244 // Don't match units in selectors or selector expressions.
245 if (!inSelectorExpression && !inSelector) {
246 tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);
247 }
248 if (tokId == -1) {
249 tokId = (_text.substring(_startIndex, _index) == '!important') ?
250 TokenKind.IMPORTANT : -1;
251 }
252
253 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;
254 }
255
256 Token finishIdentifier() {
257 // If we encounter an escape sequence, remember it so we can post-process
258 // to unescape.
259 bool hasEscapedChars = false;
260 var chars = [];
261
262 // backup so we can start with the first character
263 int validateFrom = _index;
264 _index = _startIndex;
265 while (_index < _text.length) {
266 int ch = _text.codeUnitAt(_index);
267
268 // If the previous character was "\" we need to escape. T
269 // http://www.w3.org/TR/CSS21/syndata.html#characters
270 // if followed by hexadecimal digits, create the appropriate character.
271 // otherwise, include the character in the identifier and don't treat it
272 // specially.
273 if (ch == 92/*\*/) {
274 int startHex = ++_index;
275 eatHexDigits(startHex + 6);
276 if (_index != startHex) {
277 // Parse the hex digits and add that character.
278 chars.add(int.parse('0x' + _text.substring(startHex, _index)));
279
280 if (_index == _text.length) break;
281
282 // if we stopped the hex because of a whitespace char, skip it
283 ch = _text.codeUnitAt(_index);
284 if (_index - startHex != 6 &&
285 (ch == TokenChar.SPACE || ch == TokenChar.TAB ||
286 ch == TokenChar.RETURN || ch == TokenChar.NEWLINE)) {
287 _index++;
288 }
289 } else {
290 // not a digit, just add the next character literally
291 if (_index == _text.length) break;
292 chars.add(_text.codeUnitAt(_index++));
293 }
294 } else if (_index < validateFrom || (inSelectorExpression
295 ? TokenizerHelpers.isIdentifierPartExpr(ch)
296 : TokenizerHelpers.isIdentifierPart(ch))) {
297 chars.add(ch);
298 _index++;
299 } else {
300 // Not an identifier or escaped character.
301 break;
302 }
303 }
304
305 var span = _file.span(_startIndex, _index);
306 var text = new String.fromCharCodes(chars);
307
308 return new IdentifierToken(text, getIdentifierKind(), span);
309 }
310
311 Token finishNumber() {
312 eatDigits();
313
314 if (_peekChar() == 46/*.*/) {
315 // Handle the case of 1.toString().
316 _nextChar();
317 if (TokenizerHelpers.isDigit(_peekChar())) {
318 eatDigits();
319 return _finishToken(TokenKind.DOUBLE);
320 } else {
321 _index -= 1;
322 }
323 }
324
325 return _finishToken(TokenKind.INTEGER);
326 }
327
328 bool maybeEatDigit() {
329 if (_index < _text.length
330 && TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {
331 _index += 1;
332 return true;
333 }
334 return false;
335 }
336
337 Token finishHexNumber() {
338 eatHexDigits(_text.length);
339 return _finishToken(TokenKind.HEX_INTEGER);
340 }
341
342 void eatHexDigits(int end) {
343 end = math.min(end, _text.length);
344 while (_index < end) {
345 if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
346 _index += 1;
347 } else {
348 return;
349 }
350 }
351 }
352
353 bool maybeEatHexDigit() {
354 if (_index < _text.length
355 && TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
356 _index += 1;
357 return true;
358 }
359 return false;
360 }
361
362 bool maybeEatQuestionMark() {
363 if (_index < _text.length &&
364 _text.codeUnitAt(_index) == QUESTION_MARK) {
365 _index += 1;
366 return true;
367 }
368 return false;
369 }
370
371 void eatQuestionMarks() {
372 while (_index < _text.length) {
373 if (_text.codeUnitAt(_index) == QUESTION_MARK) {
374 _index += 1;
375 } else {
376 return;
377 }
378 }
379 }
380
381 Token finishUnicodeRange() {
382 eatQuestionMarks();
383 return _finishToken(TokenKind.HEX_RANGE);
384 }
385
386 Token finishMultiLineComment() {
387 while (true) {
388 int ch = _nextChar();
389 if (ch == 0) {
390 return _finishToken(TokenKind.INCOMPLETE_COMMENT);
391 } else if (ch == 42/*'*'*/) {
392 if (_maybeEatChar(47/*'/'*/)) {
393 if (_skipWhitespace) {
394 return next();
395 } else {
396 return _finishToken(TokenKind.COMMENT);
397 }
398 }
399 } else if (ch == TokenChar.MINUS) {
400 /* Check if close part of Comment Definition --> (CDC). */
401 if (_maybeEatChar(TokenChar.MINUS)) {
402 if (_maybeEatChar(TokenChar.GREATER)) {
403 if (_skipWhitespace) {
404 return next();
405 } else {
406 return _finishToken(TokenKind.HTML_COMMENT);
407 }
408 }
409 }
410 }
411 }
412 return _errorToken();
413 }
414
415 }
416
417 /** Static helper methods. */
418 class TokenizerHelpers {
419 static bool isIdentifierStart(int c) {
420 return isIdentifierStartExpr(c) || c == 45 /*-*/;
421 }
422
423 static bool isDigit(int c) {
424 return (c >= 48/*0*/ && c <= 57/*9*/);
425 }
426
427 static bool isHexDigit(int c) {
428 return (isDigit(c) || (c >= 97/*a*/ && c <= 102/*f*/)
429 || (c >= 65/*A*/ && c <= 70/*F*/));
430 }
431
432 static bool isIdentifierPart(int c) {
433 return isIdentifierPartExpr(c) || c == 45 /*-*/;
434 }
435
436 /** Pseudo function expressions identifiers can't have a minus sign. */
437 static bool isIdentifierStartExpr(int c) {
438 return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) ||
439 // Note: Unicode 10646 chars U+00A0 or higher are allowed, see:
440 // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier
441 // http://www.w3.org/TR/CSS21/syndata.html#characters
442 // Also, escaped character should be allowed.
443 c == 95/*_*/ || c >= 0xA0 || c == 92/*\*/);
444 }
445
446 /** Pseudo function expressions identifiers can't have a minus sign. */
447 static bool isIdentifierPartExpr(int c) {
448 return (isIdentifierStartExpr(c) || isDigit(c));
449 }
450 }
OLDNEW
« no previous file with comments | « observatory_pub_packages/csslib/src/token.dart ('k') | observatory_pub_packages/csslib/src/tokenizer_base.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698