Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(213)

Side by Side Diff: observatory_pub_packages/csslib/src/tokenizer_base.dart

Issue 816693004: Add observatory_pub_packages snapshot to third_party (Closed) Base URL: http://dart.googlecode.com/svn/third_party/
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4 // Generated by scripts/tokenizer_gen.py.
5
6 part of csslib.parser;
7
8 /** Tokenizer state to support look ahead for Less' nested selectors. */
9 class TokenizerState {
10 final int index;
11 final int startIndex;
12 final bool inSelectorExpression;
13 final bool inSelector;
14
15 TokenizerState(TokenizerBase base) :
16 index = base._index,
17 startIndex = base._startIndex,
18 inSelectorExpression = base.inSelectorExpression,
19 inSelector = base.inSelector;
20 }
21
22 /**
23 * The base class for our tokenizer. The hand coded parts are in this file, with
24 * the generated parts in the subclass Tokenizer.
25 */
26 abstract class TokenizerBase {
27 final SourceFile _file;
28 final String _text;
29
30 bool _skipWhitespace;
31
32 /**
33 * Changes tokenization when in a pseudo function expression. If true then
34 * minus signs are handled as operators instead of identifiers.
35 */
36 bool inSelectorExpression = false;
37
38 /**
39 * Changes tokenization when in selectors. If true, it prevents identifiers
40 * from being treated as units. This would break things like ":lang(fr)" or
41 * the HTML (unknown) tag name "px", which is legal to use in a selector.
42 */
43 // TODO(jmesserly): is this a problem elsewhere? "fr" for example will be
44 // processed as a "fraction" unit token, preventing it from working in
45 // places where an identifier is expected. This was breaking selectors like:
46 // :lang(fr)
47 // The assumption that "fr" always means fraction (and similar issue with
48 // other units) doesn't seem valid. We probably should defer this
49 // analysis until we reach places in the parser where units are expected.
50 // I'm not sure this is tokenizing as described in the specs:
51 // http://dev.w3.org/csswg/css-syntax/
52 // http://dev.w3.org/csswg/selectors4/
53 bool inSelector = false;
54
55 int _index;
56 int _startIndex;
57
58 static const String _CDATA_START = '<![CDATA[';
59 static const String _CDATA_END = ']]>';
60
61 TokenizerBase(this._file, this._text, this._skipWhitespace,
62 [this._index = 0]);
63
64 Token next();
65 int getIdentifierKind();
66
67 /** Snapshot of Tokenizer scanning state. */
68 TokenizerState get mark => new TokenizerState(this);
69
70 /** Restore Tokenizer scanning state. */
71 void restore(TokenizerState markedData) {
72 _index = markedData.index;
73 _startIndex = markedData.startIndex;
74 inSelectorExpression = markedData.inSelectorExpression;
75 inSelector = markedData.inSelector;
76 }
77
78 int _nextChar() {
79 if (_index < _text.length) {
80 return _text.codeUnitAt(_index++);
81 } else {
82 return 0;
83 }
84 }
85
86 int _peekChar() {
87 if (_index < _text.length) {
88 return _text.codeUnitAt(_index);
89 } else {
90 return 0;
91 }
92 }
93
94 bool _maybeEatChar(int ch) {
95 if (_index < _text.length) {
96 if (_text.codeUnitAt(_index) == ch) {
97 _index++;
98 return true;
99 } else {
100 return false;
101 }
102 } else {
103 return false;
104 }
105 }
106
107 String _tokenText() {
108 if (_index < _text.length) {
109 return _text.substring(_startIndex, _index);
110 } else {
111 return _text.substring(_startIndex, _text.length);
112 }
113 }
114
115 Token _finishToken(int kind) {
116 return new Token(kind, _file.span(_startIndex, _index));
117 }
118
119 Token _errorToken([String message = null]) {
120 return new ErrorToken(
121 TokenKind.ERROR, _file.span(_startIndex, _index), message);
122 }
123
124 Token finishWhitespace() {
125 _index--;
126 while (_index < _text.length) {
127 final ch = _text.codeUnitAt(_index++);
128 if (ch == TokenChar.SPACE ||
129 ch == TokenChar.TAB ||
130 ch == TokenChar.RETURN) {
131 // do nothing
132 } else if (ch == TokenChar.NEWLINE) {
133 if (!_skipWhitespace) {
134 return _finishToken(TokenKind.WHITESPACE); // note the newline?
135 }
136 } else {
137 _index--;
138 if (_skipWhitespace) {
139 return next();
140 } else {
141 return _finishToken(TokenKind.WHITESPACE);
142 }
143 }
144
145 }
146 return _finishToken(TokenKind.END_OF_FILE);
147 }
148
149 Token finishMultiLineComment() {
150 int nesting = 1;
151 do {
152 int ch = _nextChar();
153 if (ch == 0) {
154 return _errorToken();
155 } else if (ch == TokenChar.ASTERISK) {
156 if (_maybeEatChar(TokenChar.SLASH)) {
157 nesting--;
158 }
159 } else if (ch == TokenChar.SLASH) {
160 if (_maybeEatChar(TokenChar.ASTERISK)) {
161 nesting++;
162 }
163 }
164 } while (nesting > 0);
165
166 if (_skipWhitespace) {
167 return next();
168 } else {
169 return _finishToken(TokenKind.COMMENT);
170 }
171 }
172
173 void eatDigits() {
174 while (_index < _text.length) {
175 if (TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {
176 _index++;
177 } else {
178 return;
179 }
180 }
181 }
182
183 static int _hexDigit(int c) {
184 if(c >= 48/*0*/ && c <= 57/*9*/) {
185 return c - 48;
186 } else if (c >= 97/*a*/ && c <= 102/*f*/) {
187 return c - 87;
188 } else if (c >= 65/*A*/ && c <= 70/*F*/) {
189 return c - 55;
190 } else {
191 return -1;
192 }
193 }
194
195 int readHex([int hexLength]) {
196 int maxIndex;
197 if (hexLength == null) {
198 maxIndex = _text.length - 1;
199 } else {
200 // TODO(jimhug): What if this is too long?
201 maxIndex = _index + hexLength;
202 if (maxIndex >= _text.length) return -1;
203 }
204 var result = 0;
205 while (_index < maxIndex) {
206 final digit = _hexDigit(_text.codeUnitAt(_index));
207 if (digit == -1) {
208 if (hexLength == null) {
209 return result;
210 } else {
211 return -1;
212 }
213 }
214 _hexDigit(_text.codeUnitAt(_index));
215 // Multiply by 16 rather than shift by 4 since that will result in a
216 // correct value for numbers that exceed the 32 bit precision of JS
217 // 'integers'.
218 // TODO: Figure out a better solution to integer truncation. Issue 638.
219 result = (result * 16) + digit;
220 _index++;
221 }
222
223 return result;
224 }
225
226 Token finishNumber() {
227 eatDigits();
228
229 if (_peekChar() == TokenChar.DOT) {
230 // Handle the case of 1.toString().
231 _nextChar();
232 if (TokenizerHelpers.isDigit(_peekChar())) {
233 eatDigits();
234 return finishNumberExtra(TokenKind.DOUBLE);
235 } else {
236 _index--;
237 }
238 }
239
240 return finishNumberExtra(TokenKind.INTEGER);
241 }
242
243 Token finishNumberExtra(int kind) {
244 if (_maybeEatChar(101/*e*/) || _maybeEatChar(69/*E*/)) {
245 kind = TokenKind.DOUBLE;
246 _maybeEatChar(TokenKind.MINUS);
247 _maybeEatChar(TokenKind.PLUS);
248 eatDigits();
249 }
250 if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) {
251 _nextChar();
252 return _errorToken("illegal character in number");
253 }
254
255 return _finishToken(kind);
256 }
257
258 Token _makeStringToken(List<int> buf, bool isPart) {
259 final s = new String.fromCharCodes(buf);
260 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;
261 return new LiteralToken(kind, _file.span(_startIndex, _index), s);
262 }
263
264 Token makeIEFilter(int start, int end) {
265 var filter = _text.substring(start, end);
266 return new LiteralToken(TokenKind.STRING, _file.span(start, end), filter);
267 }
268
269 Token _makeRawStringToken(bool isMultiline) {
270 var s;
271 if (isMultiline) {
272 // Skip initial newline in multiline strings
273 int start = _startIndex + 4;
274 if (_text[start] == '\n') start++;
275 s = _text.substring(start, _index - 3);
276 } else {
277 s = _text.substring(_startIndex + 2, _index - 1);
278 }
279 return new LiteralToken(TokenKind.STRING,
280 _file.span(_startIndex, _index), s);
281 }
282
283 Token finishMultilineString(int quote) {
284 var buf = <int>[];
285 while (true) {
286 int ch = _nextChar();
287 if (ch == 0) {
288 return _errorToken();
289 } else if (ch == quote) {
290 if (_maybeEatChar(quote)) {
291 if (_maybeEatChar(quote)) {
292 return _makeStringToken(buf, false);
293 }
294 buf.add(quote);
295 }
296 buf.add(quote);
297 } else if (ch == TokenChar.BACKSLASH) {
298 var escapeVal = readEscapeSequence();
299 if (escapeVal == -1) {
300 return _errorToken("invalid hex escape sequence");
301 } else {
302 buf.add(escapeVal);
303 }
304 } else {
305 buf.add(ch);
306 }
307 }
308 }
309
310 Token _finishOpenBrace() {
311 return _finishToken(TokenKind.LBRACE);
312 }
313
314 Token _finishCloseBrace() {
315 return _finishToken(TokenKind.RBRACE);
316 }
317
318 Token finishString(int quote) {
319 if (_maybeEatChar(quote)) {
320 if (_maybeEatChar(quote)) {
321 // skip an initial newline
322 _maybeEatChar(TokenChar.NEWLINE);
323 return finishMultilineString(quote);
324 } else {
325 return _makeStringToken(new List<int>(), false);
326 }
327 }
328 return finishStringBody(quote);
329 }
330
331 Token finishRawString(int quote) {
332 if (_maybeEatChar(quote)) {
333 if (_maybeEatChar(quote)) {
334 return finishMultilineRawString(quote);
335 } else {
336 return _makeStringToken(<int>[], false);
337 }
338 }
339 while (true) {
340 int ch = _nextChar();
341 if (ch == quote) {
342 return _makeRawStringToken(false);
343 } else if (ch == 0) {
344 return _errorToken();
345 }
346 }
347 }
348
349 Token finishMultilineRawString(int quote) {
350 while (true) {
351 int ch = _nextChar();
352 if (ch == 0) {
353 return _errorToken();
354 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {
355 return _makeRawStringToken(true);
356 }
357 }
358 }
359
360 Token finishStringBody(int quote) {
361 var buf = new List<int>();
362 while (true) {
363 int ch = _nextChar();
364 if (ch == quote) {
365 return _makeStringToken(buf, false);
366 } else if (ch == 0) {
367 return _errorToken();
368 } else if (ch == TokenChar.BACKSLASH) {
369 var escapeVal = readEscapeSequence();
370 if (escapeVal == -1) {
371 return _errorToken("invalid hex escape sequence");
372 } else {
373 buf.add(escapeVal);
374 }
375 } else {
376 buf.add(ch);
377 }
378 }
379 }
380
381 int readEscapeSequence() {
382 final ch = _nextChar();
383 int hexValue;
384 switch (ch) {
385 case 110/*n*/:
386 return TokenChar.NEWLINE;
387 case 114/*r*/:
388 return TokenChar.RETURN;
389 case 102/*f*/:
390 return TokenChar.FF;
391 case 98/*b*/:
392 return TokenChar.BACKSPACE;
393 case 116/*t*/:
394 return TokenChar.TAB;
395 case 118/*v*/:
396 return TokenChar.FF;
397 case 120/*x*/:
398 hexValue = readHex(2);
399 break;
400 case 117/*u*/:
401 if (_maybeEatChar(TokenChar.LBRACE)) {
402 hexValue = readHex();
403 if (!_maybeEatChar(TokenChar.RBRACE)) {
404 return -1;
405 }
406 } else {
407 hexValue = readHex(4);
408 }
409 break;
410 default: return ch;
411 }
412
413 if (hexValue == -1) return -1;
414
415 // According to the Unicode standard the high and low surrogate halves
416 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF
417 // are not legal Unicode values.
418 if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) {
419 return hexValue;
420 } else if (hexValue <= 0x10FFFF){
421 messages.error('unicode values greater than 2 bytes not implemented yet',
422 _file.span(_startIndex, _startIndex + 1));
423 return -1;
424 } else {
425 return -1;
426 }
427 }
428
429 Token finishDot() {
430 if (TokenizerHelpers.isDigit(_peekChar())) {
431 eatDigits();
432 return finishNumberExtra(TokenKind.DOUBLE);
433 } else {
434 return _finishToken(TokenKind.DOT);
435 }
436 }
437 }
438
OLDNEW
« no previous file with comments | « observatory_pub_packages/csslib/src/tokenizer.dart ('k') | observatory_pub_packages/csslib/src/tokenkind.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698