OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 // Generated by scripts/tokenizer_gen.py. | |
5 | |
6 part of csslib.parser; | |
7 | |
8 /** Tokenizer state to support look ahead for Less' nested selectors. */ | |
9 class TokenizerState { | |
10 final int index; | |
11 final int startIndex; | |
12 final bool inSelectorExpression; | |
13 final bool inSelector; | |
14 | |
15 TokenizerState(TokenizerBase base) | |
16 : index = base._index, | |
17 startIndex = base._startIndex, | |
18 inSelectorExpression = base.inSelectorExpression, | |
19 inSelector = base.inSelector; | |
20 } | |
21 | |
22 /** | |
23 * The base class for our tokenizer. The hand coded parts are in this file, with | |
24 * the generated parts in the subclass Tokenizer. | |
25 */ | |
26 abstract class TokenizerBase { | |
27 final SourceFile _file; | |
28 final String _text; | |
29 | |
30 bool _inString; | |
31 | |
32 /** | |
33 * Changes tokenization when in a pseudo function expression. If true then | |
34 * minus signs are handled as operators instead of identifiers. | |
35 */ | |
36 bool inSelectorExpression = false; | |
37 | |
38 /** | |
39 * Changes tokenization when in selectors. If true, it prevents identifiers | |
40 * from being treated as units. This would break things like ":lang(fr)" or | |
41 * the HTML (unknown) tag name "px", which is legal to use in a selector. | |
42 */ | |
43 // TODO(jmesserly): is this a problem elsewhere? "fr" for example will be | |
44 // processed as a "fraction" unit token, preventing it from working in | |
45 // places where an identifier is expected. This was breaking selectors like: | |
46 // :lang(fr) | |
47 // The assumption that "fr" always means fraction (and similar issue with | |
48 // other units) doesn't seem valid. We probably should defer this | |
49 // analysis until we reach places in the parser where units are expected. | |
50 // I'm not sure this is tokenizing as described in the specs: | |
51 // http://dev.w3.org/csswg/css-syntax/ | |
52 // http://dev.w3.org/csswg/selectors4/ | |
53 bool inSelector = false; | |
54 | |
55 int _index = 0; | |
56 int _startIndex = 0; | |
57 | |
58 TokenizerBase(this._file, this._text, this._inString, | |
59 [this._index = 0]); | |
60 | |
61 Token next(); | |
62 int getIdentifierKind(); | |
63 | |
64 /** Snapshot of Tokenizer scanning state. */ | |
65 TokenizerState get mark => new TokenizerState(this); | |
66 | |
67 /** Restore Tokenizer scanning state. */ | |
68 void restore(TokenizerState markedData) { | |
69 _index = markedData.index; | |
70 _startIndex = markedData.startIndex; | |
71 inSelectorExpression = markedData.inSelectorExpression; | |
72 inSelector = markedData.inSelector; | |
73 } | |
74 | |
75 int _nextChar() { | |
76 if (_index < _text.length) { | |
77 return _text.codeUnitAt(_index++); | |
78 } else { | |
79 return 0; | |
80 } | |
81 } | |
82 | |
83 int _peekChar() { | |
84 if (_index < _text.length) { | |
85 return _text.codeUnitAt(_index); | |
86 } else { | |
87 return 0; | |
88 } | |
89 } | |
90 | |
91 bool _maybeEatChar(int ch) { | |
92 if (_index < _text.length) { | |
93 if (_text.codeUnitAt(_index) == ch) { | |
94 _index++; | |
95 return true; | |
96 } else { | |
97 return false; | |
98 } | |
99 } else { | |
100 return false; | |
101 } | |
102 } | |
103 | |
104 Token _finishToken(int kind) { | |
105 return new Token(kind, _file.span(_startIndex, _index)); | |
106 } | |
107 | |
108 Token _errorToken([String message = null]) { | |
109 return new ErrorToken( | |
110 TokenKind.ERROR, _file.span(_startIndex, _index), message); | |
111 } | |
112 | |
113 Token finishWhitespace() { | |
114 _index--; | |
115 while (_index < _text.length) { | |
116 final ch = _text.codeUnitAt(_index++); | |
117 if (ch == TokenChar.SPACE || | |
118 ch == TokenChar.TAB || | |
119 ch == TokenChar.RETURN) { | |
120 // do nothing | |
121 } else if (ch == TokenChar.NEWLINE) { | |
122 if (!_inString) { | |
123 return _finishToken(TokenKind.WHITESPACE); // note the newline? | |
124 } | |
125 } else { | |
126 _index--; | |
127 if (_inString) { | |
128 return next(); | |
129 } else { | |
130 return _finishToken(TokenKind.WHITESPACE); | |
131 } | |
132 } | |
133 } | |
134 return _finishToken(TokenKind.END_OF_FILE); | |
135 } | |
136 | |
137 Token finishMultiLineComment() { | |
138 int nesting = 1; | |
139 do { | |
140 int ch = _nextChar(); | |
141 if (ch == 0) { | |
142 return _errorToken(); | |
143 } else if (ch == TokenChar.ASTERISK) { | |
144 if (_maybeEatChar(TokenChar.SLASH)) { | |
145 nesting--; | |
146 } | |
147 } else if (ch == TokenChar.SLASH) { | |
148 if (_maybeEatChar(TokenChar.ASTERISK)) { | |
149 nesting++; | |
150 } | |
151 } | |
152 } while (nesting > 0); | |
153 | |
154 if (_inString) { | |
155 return next(); | |
156 } else { | |
157 return _finishToken(TokenKind.COMMENT); | |
158 } | |
159 } | |
160 | |
161 void eatDigits() { | |
162 while (_index < _text.length) { | |
163 if (TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) { | |
164 _index++; | |
165 } else { | |
166 return; | |
167 } | |
168 } | |
169 } | |
170 | |
171 static int _hexDigit(int c) { | |
172 if (c >= 48 /*0*/ && c <= 57 /*9*/) { | |
173 return c - 48; | |
174 } else if (c >= 97 /*a*/ && c <= 102 /*f*/) { | |
175 return c - 87; | |
176 } else if (c >= 65 /*A*/ && c <= 70 /*F*/) { | |
177 return c - 55; | |
178 } else { | |
179 return -1; | |
180 } | |
181 } | |
182 | |
183 int readHex([int hexLength]) { | |
184 int maxIndex; | |
185 if (hexLength == null) { | |
186 maxIndex = _text.length - 1; | |
187 } else { | |
188 // TODO(jimhug): What if this is too long? | |
189 maxIndex = _index + hexLength; | |
190 if (maxIndex >= _text.length) return -1; | |
191 } | |
192 var result = 0; | |
193 while (_index < maxIndex) { | |
194 final digit = _hexDigit(_text.codeUnitAt(_index)); | |
195 if (digit == -1) { | |
196 if (hexLength == null) { | |
197 return result; | |
198 } else { | |
199 return -1; | |
200 } | |
201 } | |
202 _hexDigit(_text.codeUnitAt(_index)); | |
203 // Multiply by 16 rather than shift by 4 since that will result in a | |
204 // correct value for numbers that exceed the 32 bit precision of JS | |
205 // 'integers'. | |
206 // TODO: Figure out a better solution to integer truncation. Issue 638. | |
207 result = (result * 16) + digit; | |
208 _index++; | |
209 } | |
210 | |
211 return result; | |
212 } | |
213 | |
214 Token finishNumber() { | |
215 eatDigits(); | |
216 | |
217 if (_peekChar() == TokenChar.DOT) { | |
218 // Handle the case of 1.toString(). | |
219 _nextChar(); | |
220 if (TokenizerHelpers.isDigit(_peekChar())) { | |
221 eatDigits(); | |
222 return finishNumberExtra(TokenKind.DOUBLE); | |
223 } else { | |
224 _index--; | |
225 } | |
226 } | |
227 | |
228 return finishNumberExtra(TokenKind.INTEGER); | |
229 } | |
230 | |
231 Token finishNumberExtra(int kind) { | |
232 if (_maybeEatChar(101 /*e*/) || _maybeEatChar(69 /*E*/)) { | |
233 kind = TokenKind.DOUBLE; | |
234 _maybeEatChar(TokenKind.MINUS); | |
235 _maybeEatChar(TokenKind.PLUS); | |
236 eatDigits(); | |
237 } | |
238 if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) { | |
239 _nextChar(); | |
240 return _errorToken("illegal character in number"); | |
241 } | |
242 | |
243 return _finishToken(kind); | |
244 } | |
245 | |
246 Token _makeStringToken(List<int> buf, bool isPart) { | |
247 final s = new String.fromCharCodes(buf); | |
248 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING; | |
249 return new LiteralToken(kind, _file.span(_startIndex, _index), s); | |
250 } | |
251 | |
252 Token makeIEFilter(int start, int end) { | |
253 var filter = _text.substring(start, end); | |
254 return new LiteralToken(TokenKind.STRING, _file.span(start, end), filter); | |
255 } | |
256 | |
257 Token _makeRawStringToken(bool isMultiline) { | |
258 var s; | |
259 if (isMultiline) { | |
260 // Skip initial newline in multiline strings | |
261 int start = _startIndex + 4; | |
262 if (_text[start] == '\n') start++; | |
263 s = _text.substring(start, _index - 3); | |
264 } else { | |
265 s = _text.substring(_startIndex + 2, _index - 1); | |
266 } | |
267 return new LiteralToken( | |
268 TokenKind.STRING, _file.span(_startIndex, _index), s); | |
269 } | |
270 | |
271 Token finishMultilineString(int quote) { | |
272 var buf = <int>[]; | |
273 while (true) { | |
274 int ch = _nextChar(); | |
275 if (ch == 0) { | |
276 return _errorToken(); | |
277 } else if (ch == quote) { | |
278 if (_maybeEatChar(quote)) { | |
279 if (_maybeEatChar(quote)) { | |
280 return _makeStringToken(buf, false); | |
281 } | |
282 buf.add(quote); | |
283 } | |
284 buf.add(quote); | |
285 } else if (ch == TokenChar.BACKSLASH) { | |
286 var escapeVal = readEscapeSequence(); | |
287 if (escapeVal == -1) { | |
288 return _errorToken("invalid hex escape sequence"); | |
289 } else { | |
290 buf.add(escapeVal); | |
291 } | |
292 } else { | |
293 buf.add(ch); | |
294 } | |
295 } | |
296 } | |
297 | |
298 Token finishString(int quote) { | |
299 if (_maybeEatChar(quote)) { | |
300 if (_maybeEatChar(quote)) { | |
301 // skip an initial newline | |
302 _maybeEatChar(TokenChar.NEWLINE); | |
303 return finishMultilineString(quote); | |
304 } else { | |
305 return _makeStringToken(new List<int>(), false); | |
306 } | |
307 } | |
308 return finishStringBody(quote); | |
309 } | |
310 | |
311 Token finishRawString(int quote) { | |
312 if (_maybeEatChar(quote)) { | |
313 if (_maybeEatChar(quote)) { | |
314 return finishMultilineRawString(quote); | |
315 } else { | |
316 return _makeStringToken(<int>[], false); | |
317 } | |
318 } | |
319 while (true) { | |
320 int ch = _nextChar(); | |
321 if (ch == quote) { | |
322 return _makeRawStringToken(false); | |
323 } else if (ch == 0) { | |
324 return _errorToken(); | |
325 } | |
326 } | |
327 } | |
328 | |
329 Token finishMultilineRawString(int quote) { | |
330 while (true) { | |
331 int ch = _nextChar(); | |
332 if (ch == 0) { | |
333 return _errorToken(); | |
334 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) { | |
335 return _makeRawStringToken(true); | |
336 } | |
337 } | |
338 } | |
339 | |
340 Token finishStringBody(int quote) { | |
341 var buf = new List<int>(); | |
342 while (true) { | |
343 int ch = _nextChar(); | |
344 if (ch == quote) { | |
345 return _makeStringToken(buf, false); | |
346 } else if (ch == 0) { | |
347 return _errorToken(); | |
348 } else if (ch == TokenChar.BACKSLASH) { | |
349 var escapeVal = readEscapeSequence(); | |
350 if (escapeVal == -1) { | |
351 return _errorToken("invalid hex escape sequence"); | |
352 } else { | |
353 buf.add(escapeVal); | |
354 } | |
355 } else { | |
356 buf.add(ch); | |
357 } | |
358 } | |
359 } | |
360 | |
361 int readEscapeSequence() { | |
362 final ch = _nextChar(); | |
363 int hexValue; | |
364 switch (ch) { | |
365 case 110 /*n*/ : | |
366 return TokenChar.NEWLINE; | |
367 case 114 /*r*/ : | |
368 return TokenChar.RETURN; | |
369 case 102 /*f*/ : | |
370 return TokenChar.FF; | |
371 case 98 /*b*/ : | |
372 return TokenChar.BACKSPACE; | |
373 case 116 /*t*/ : | |
374 return TokenChar.TAB; | |
375 case 118 /*v*/ : | |
376 return TokenChar.FF; | |
377 case 120 /*x*/ : | |
378 hexValue = readHex(2); | |
379 break; | |
380 case 117 /*u*/ : | |
381 if (_maybeEatChar(TokenChar.LBRACE)) { | |
382 hexValue = readHex(); | |
383 if (!_maybeEatChar(TokenChar.RBRACE)) { | |
384 return -1; | |
385 } | |
386 } else { | |
387 hexValue = readHex(4); | |
388 } | |
389 break; | |
390 default: | |
391 return ch; | |
392 } | |
393 | |
394 if (hexValue == -1) return -1; | |
395 | |
396 // According to the Unicode standard the high and low surrogate halves | |
397 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF | |
398 // are not legal Unicode values. | |
399 if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) { | |
400 return hexValue; | |
401 } else if (hexValue <= 0x10FFFF) { | |
402 messages.error('unicode values greater than 2 bytes not implemented yet', | |
403 _file.span(_startIndex, _startIndex + 1)); | |
404 return -1; | |
405 } else { | |
406 return -1; | |
407 } | |
408 } | |
409 | |
410 Token finishDot() { | |
411 if (TokenizerHelpers.isDigit(_peekChar())) { | |
412 eatDigits(); | |
413 return finishNumberExtra(TokenKind.DOUBLE); | |
414 } else { | |
415 return _finishToken(TokenKind.DOT); | |
416 } | |
417 } | |
418 } | |
OLD | NEW |