Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(349)

Side by Side Diff: csslib/lib/src/tokenizer_base.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « csslib/lib/src/tokenizer.dart ('k') | csslib/lib/src/tokenkind.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4 // Generated by scripts/tokenizer_gen.py.
5
6 part of csslib.parser;
7
8 /** Tokenizer state to support look ahead for Less' nested selectors. */
9 class TokenizerState {
10 final int index;
11 final int startIndex;
12 final bool inSelectorExpression;
13 final bool inSelector;
14
15 TokenizerState(TokenizerBase base)
16 : index = base._index,
17 startIndex = base._startIndex,
18 inSelectorExpression = base.inSelectorExpression,
19 inSelector = base.inSelector;
20 }
21
22 /**
23 * The base class for our tokenizer. The hand coded parts are in this file, with
24 * the generated parts in the subclass Tokenizer.
25 */
26 abstract class TokenizerBase {
27 final SourceFile _file;
28 final String _text;
29
30 bool _inString;
31
32 /**
33 * Changes tokenization when in a pseudo function expression. If true then
34 * minus signs are handled as operators instead of identifiers.
35 */
36 bool inSelectorExpression = false;
37
38 /**
39 * Changes tokenization when in selectors. If true, it prevents identifiers
40 * from being treated as units. This would break things like ":lang(fr)" or
41 * the HTML (unknown) tag name "px", which is legal to use in a selector.
42 */
43 // TODO(jmesserly): is this a problem elsewhere? "fr" for example will be
44 // processed as a "fraction" unit token, preventing it from working in
45 // places where an identifier is expected. This was breaking selectors like:
46 // :lang(fr)
47 // The assumption that "fr" always means fraction (and similar issue with
48 // other units) doesn't seem valid. We probably should defer this
49 // analysis until we reach places in the parser where units are expected.
50 // I'm not sure this is tokenizing as described in the specs:
51 // http://dev.w3.org/csswg/css-syntax/
52 // http://dev.w3.org/csswg/selectors4/
53 bool inSelector = false;
54
55 int _index = 0;
56 int _startIndex = 0;
57
58 TokenizerBase(this._file, this._text, this._inString,
59 [this._index = 0]);
60
61 Token next();
62 int getIdentifierKind();
63
64 /** Snapshot of Tokenizer scanning state. */
65 TokenizerState get mark => new TokenizerState(this);
66
67 /** Restore Tokenizer scanning state. */
68 void restore(TokenizerState markedData) {
69 _index = markedData.index;
70 _startIndex = markedData.startIndex;
71 inSelectorExpression = markedData.inSelectorExpression;
72 inSelector = markedData.inSelector;
73 }
74
75 int _nextChar() {
76 if (_index < _text.length) {
77 return _text.codeUnitAt(_index++);
78 } else {
79 return 0;
80 }
81 }
82
83 int _peekChar() {
84 if (_index < _text.length) {
85 return _text.codeUnitAt(_index);
86 } else {
87 return 0;
88 }
89 }
90
91 bool _maybeEatChar(int ch) {
92 if (_index < _text.length) {
93 if (_text.codeUnitAt(_index) == ch) {
94 _index++;
95 return true;
96 } else {
97 return false;
98 }
99 } else {
100 return false;
101 }
102 }
103
104 Token _finishToken(int kind) {
105 return new Token(kind, _file.span(_startIndex, _index));
106 }
107
108 Token _errorToken([String message = null]) {
109 return new ErrorToken(
110 TokenKind.ERROR, _file.span(_startIndex, _index), message);
111 }
112
113 Token finishWhitespace() {
114 _index--;
115 while (_index < _text.length) {
116 final ch = _text.codeUnitAt(_index++);
117 if (ch == TokenChar.SPACE ||
118 ch == TokenChar.TAB ||
119 ch == TokenChar.RETURN) {
120 // do nothing
121 } else if (ch == TokenChar.NEWLINE) {
122 if (!_inString) {
123 return _finishToken(TokenKind.WHITESPACE); // note the newline?
124 }
125 } else {
126 _index--;
127 if (_inString) {
128 return next();
129 } else {
130 return _finishToken(TokenKind.WHITESPACE);
131 }
132 }
133 }
134 return _finishToken(TokenKind.END_OF_FILE);
135 }
136
137 Token finishMultiLineComment() {
138 int nesting = 1;
139 do {
140 int ch = _nextChar();
141 if (ch == 0) {
142 return _errorToken();
143 } else if (ch == TokenChar.ASTERISK) {
144 if (_maybeEatChar(TokenChar.SLASH)) {
145 nesting--;
146 }
147 } else if (ch == TokenChar.SLASH) {
148 if (_maybeEatChar(TokenChar.ASTERISK)) {
149 nesting++;
150 }
151 }
152 } while (nesting > 0);
153
154 if (_inString) {
155 return next();
156 } else {
157 return _finishToken(TokenKind.COMMENT);
158 }
159 }
160
161 void eatDigits() {
162 while (_index < _text.length) {
163 if (TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {
164 _index++;
165 } else {
166 return;
167 }
168 }
169 }
170
171 static int _hexDigit(int c) {
172 if (c >= 48 /*0*/ && c <= 57 /*9*/) {
173 return c - 48;
174 } else if (c >= 97 /*a*/ && c <= 102 /*f*/) {
175 return c - 87;
176 } else if (c >= 65 /*A*/ && c <= 70 /*F*/) {
177 return c - 55;
178 } else {
179 return -1;
180 }
181 }
182
183 int readHex([int hexLength]) {
184 int maxIndex;
185 if (hexLength == null) {
186 maxIndex = _text.length - 1;
187 } else {
188 // TODO(jimhug): What if this is too long?
189 maxIndex = _index + hexLength;
190 if (maxIndex >= _text.length) return -1;
191 }
192 var result = 0;
193 while (_index < maxIndex) {
194 final digit = _hexDigit(_text.codeUnitAt(_index));
195 if (digit == -1) {
196 if (hexLength == null) {
197 return result;
198 } else {
199 return -1;
200 }
201 }
202 _hexDigit(_text.codeUnitAt(_index));
203 // Multiply by 16 rather than shift by 4 since that will result in a
204 // correct value for numbers that exceed the 32 bit precision of JS
205 // 'integers'.
206 // TODO: Figure out a better solution to integer truncation. Issue 638.
207 result = (result * 16) + digit;
208 _index++;
209 }
210
211 return result;
212 }
213
214 Token finishNumber() {
215 eatDigits();
216
217 if (_peekChar() == TokenChar.DOT) {
218 // Handle the case of 1.toString().
219 _nextChar();
220 if (TokenizerHelpers.isDigit(_peekChar())) {
221 eatDigits();
222 return finishNumberExtra(TokenKind.DOUBLE);
223 } else {
224 _index--;
225 }
226 }
227
228 return finishNumberExtra(TokenKind.INTEGER);
229 }
230
231 Token finishNumberExtra(int kind) {
232 if (_maybeEatChar(101 /*e*/) || _maybeEatChar(69 /*E*/)) {
233 kind = TokenKind.DOUBLE;
234 _maybeEatChar(TokenKind.MINUS);
235 _maybeEatChar(TokenKind.PLUS);
236 eatDigits();
237 }
238 if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) {
239 _nextChar();
240 return _errorToken("illegal character in number");
241 }
242
243 return _finishToken(kind);
244 }
245
246 Token _makeStringToken(List<int> buf, bool isPart) {
247 final s = new String.fromCharCodes(buf);
248 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;
249 return new LiteralToken(kind, _file.span(_startIndex, _index), s);
250 }
251
252 Token makeIEFilter(int start, int end) {
253 var filter = _text.substring(start, end);
254 return new LiteralToken(TokenKind.STRING, _file.span(start, end), filter);
255 }
256
257 Token _makeRawStringToken(bool isMultiline) {
258 var s;
259 if (isMultiline) {
260 // Skip initial newline in multiline strings
261 int start = _startIndex + 4;
262 if (_text[start] == '\n') start++;
263 s = _text.substring(start, _index - 3);
264 } else {
265 s = _text.substring(_startIndex + 2, _index - 1);
266 }
267 return new LiteralToken(
268 TokenKind.STRING, _file.span(_startIndex, _index), s);
269 }
270
271 Token finishMultilineString(int quote) {
272 var buf = <int>[];
273 while (true) {
274 int ch = _nextChar();
275 if (ch == 0) {
276 return _errorToken();
277 } else if (ch == quote) {
278 if (_maybeEatChar(quote)) {
279 if (_maybeEatChar(quote)) {
280 return _makeStringToken(buf, false);
281 }
282 buf.add(quote);
283 }
284 buf.add(quote);
285 } else if (ch == TokenChar.BACKSLASH) {
286 var escapeVal = readEscapeSequence();
287 if (escapeVal == -1) {
288 return _errorToken("invalid hex escape sequence");
289 } else {
290 buf.add(escapeVal);
291 }
292 } else {
293 buf.add(ch);
294 }
295 }
296 }
297
298 Token finishString(int quote) {
299 if (_maybeEatChar(quote)) {
300 if (_maybeEatChar(quote)) {
301 // skip an initial newline
302 _maybeEatChar(TokenChar.NEWLINE);
303 return finishMultilineString(quote);
304 } else {
305 return _makeStringToken(new List<int>(), false);
306 }
307 }
308 return finishStringBody(quote);
309 }
310
311 Token finishRawString(int quote) {
312 if (_maybeEatChar(quote)) {
313 if (_maybeEatChar(quote)) {
314 return finishMultilineRawString(quote);
315 } else {
316 return _makeStringToken(<int>[], false);
317 }
318 }
319 while (true) {
320 int ch = _nextChar();
321 if (ch == quote) {
322 return _makeRawStringToken(false);
323 } else if (ch == 0) {
324 return _errorToken();
325 }
326 }
327 }
328
329 Token finishMultilineRawString(int quote) {
330 while (true) {
331 int ch = _nextChar();
332 if (ch == 0) {
333 return _errorToken();
334 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {
335 return _makeRawStringToken(true);
336 }
337 }
338 }
339
340 Token finishStringBody(int quote) {
341 var buf = new List<int>();
342 while (true) {
343 int ch = _nextChar();
344 if (ch == quote) {
345 return _makeStringToken(buf, false);
346 } else if (ch == 0) {
347 return _errorToken();
348 } else if (ch == TokenChar.BACKSLASH) {
349 var escapeVal = readEscapeSequence();
350 if (escapeVal == -1) {
351 return _errorToken("invalid hex escape sequence");
352 } else {
353 buf.add(escapeVal);
354 }
355 } else {
356 buf.add(ch);
357 }
358 }
359 }
360
361 int readEscapeSequence() {
362 final ch = _nextChar();
363 int hexValue;
364 switch (ch) {
365 case 110 /*n*/ :
366 return TokenChar.NEWLINE;
367 case 114 /*r*/ :
368 return TokenChar.RETURN;
369 case 102 /*f*/ :
370 return TokenChar.FF;
371 case 98 /*b*/ :
372 return TokenChar.BACKSPACE;
373 case 116 /*t*/ :
374 return TokenChar.TAB;
375 case 118 /*v*/ :
376 return TokenChar.FF;
377 case 120 /*x*/ :
378 hexValue = readHex(2);
379 break;
380 case 117 /*u*/ :
381 if (_maybeEatChar(TokenChar.LBRACE)) {
382 hexValue = readHex();
383 if (!_maybeEatChar(TokenChar.RBRACE)) {
384 return -1;
385 }
386 } else {
387 hexValue = readHex(4);
388 }
389 break;
390 default:
391 return ch;
392 }
393
394 if (hexValue == -1) return -1;
395
396 // According to the Unicode standard the high and low surrogate halves
397 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF
398 // are not legal Unicode values.
399 if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) {
400 return hexValue;
401 } else if (hexValue <= 0x10FFFF) {
402 messages.error('unicode values greater than 2 bytes not implemented yet',
403 _file.span(_startIndex, _startIndex + 1));
404 return -1;
405 } else {
406 return -1;
407 }
408 }
409
410 Token finishDot() {
411 if (TokenizerHelpers.isDigit(_peekChar())) {
412 eatDigits();
413 return finishNumberExtra(TokenKind.DOUBLE);
414 } else {
415 return _finishToken(TokenKind.DOT);
416 }
417 }
418 }
OLDNEW
« no previous file with comments | « csslib/lib/src/tokenizer.dart ('k') | csslib/lib/src/tokenkind.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698