OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of csslib.parser; | 5 part of csslib.parser; |
6 | 6 |
7 class Tokenizer extends TokenizerBase { | 7 class Tokenizer extends TokenizerBase { |
8 /** U+ prefix for unicode characters. */ | 8 /** U+ prefix for unicode characters. */ |
9 final UNICODE_U = 'U'.codeUnitAt(0); | 9 final UNICODE_U = 'U'.codeUnitAt(0); |
10 final UNICODE_LOWER_U = 'u'.codeUnitAt(0); | 10 final UNICODE_LOWER_U = 'u'.codeUnitAt(0); |
(...skipping 23 matching lines...) Expand all Loading... |
34 case TokenChar.END_OF_FILE: | 34 case TokenChar.END_OF_FILE: |
35 return _finishToken(TokenKind.END_OF_FILE); | 35 return _finishToken(TokenKind.END_OF_FILE); |
36 case TokenChar.AT: | 36 case TokenChar.AT: |
37 int peekCh = _peekChar(); | 37 int peekCh = _peekChar(); |
38 if (TokenizerHelpers.isIdentifierStart(peekCh)) { | 38 if (TokenizerHelpers.isIdentifierStart(peekCh)) { |
39 var oldIndex = _index; | 39 var oldIndex = _index; |
40 var oldStartIndex = _startIndex; | 40 var oldStartIndex = _startIndex; |
41 | 41 |
42 _startIndex = _index; | 42 _startIndex = _index; |
43 ch = _nextChar(); | 43 ch = _nextChar(); |
44 Token ident = this.finishIdentifier(ch); | 44 Token ident = finishIdentifier(); |
45 | 45 |
46 // Is it a directive? | 46 // Is it a directive? |
47 int tokId = TokenKind.matchDirectives(_text, _startIndex, | 47 int tokId = TokenKind.matchDirectives(_text, _startIndex, |
48 _index - _startIndex); | 48 _index - _startIndex); |
49 if (tokId == -1) { | 49 if (tokId == -1) { |
50 // No, is it a margin directive? | 50 // No, is it a margin directive? |
51 tokId = TokenKind.matchMarginDirectives(_text, _startIndex, | 51 tokId = TokenKind.matchMarginDirectives(_text, _startIndex, |
52 _index - _startIndex); | 52 _index - _startIndex); |
53 } | 53 } |
54 | 54 |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
94 // ]]> | 94 // ]]> |
95 return next(); | 95 return next(); |
96 } | 96 } |
97 return _finishToken(TokenKind.RBRACK); | 97 return _finishToken(TokenKind.RBRACK); |
98 case TokenChar.HASH: | 98 case TokenChar.HASH: |
99 return _finishToken(TokenKind.HASH); | 99 return _finishToken(TokenKind.HASH); |
100 case TokenChar.PLUS: | 100 case TokenChar.PLUS: |
101 if (maybeEatDigit()) return finishNumber(); | 101 if (maybeEatDigit()) return finishNumber(); |
102 return _finishToken(TokenKind.PLUS); | 102 return _finishToken(TokenKind.PLUS); |
103 case TokenChar.MINUS: | 103 case TokenChar.MINUS: |
104 if (selectorExpression || unicodeRange) { | 104 if (inSelectorExpression || unicodeRange) { |
105 // If parsing in pseudo function expression then minus is an operator | 105 // If parsing in pseudo function expression then minus is an operator |
106 // not part of identifier e.g., interval value range (e.g. U+400-4ff) | 106 // not part of identifier e.g., interval value range (e.g. U+400-4ff) |
107 // or minus operator in selector expression. | 107 // or minus operator in selector expression. |
108 return _finishToken(TokenKind.MINUS); | 108 return _finishToken(TokenKind.MINUS); |
109 } else if (maybeEatDigit()) { | 109 } else if (maybeEatDigit()) { |
110 return finishNumber(); | 110 return finishNumber(); |
111 } else if (TokenizerHelpers.isIdentifierStart(ch)) { | 111 } else if (TokenizerHelpers.isIdentifierStart(ch)) { |
112 return this.finishIdentifier(ch); | 112 return finishIdentifier(); |
113 } | 113 } |
114 return _finishToken(TokenKind.MINUS); | 114 return _finishToken(TokenKind.MINUS); |
115 case TokenChar.GREATER: | 115 case TokenChar.GREATER: |
116 return _finishToken(TokenKind.GREATER); | 116 return _finishToken(TokenKind.GREATER); |
117 case TokenChar.TILDE: | 117 case TokenChar.TILDE: |
118 if (_maybeEatChar(TokenChar.EQUALS)) { | 118 if (_maybeEatChar(TokenChar.EQUALS)) { |
119 return _finishToken(TokenKind.INCLUDES); // ~= | 119 return _finishToken(TokenKind.INCLUDES); // ~= |
120 } | 120 } |
121 return _finishToken(TokenKind.TILDE); | 121 return _finishToken(TokenKind.TILDE); |
122 case TokenChar.ASTERISK: | 122 case TokenChar.ASTERISK: |
123 if (_maybeEatChar(TokenChar.EQUALS)) { | 123 if (_maybeEatChar(TokenChar.EQUALS)) { |
124 return _finishToken(TokenKind.SUBSTRING_MATCH); // *= | 124 return _finishToken(TokenKind.SUBSTRING_MATCH); // *= |
125 } | 125 } |
126 return _finishToken(TokenKind.ASTERISK); | 126 return _finishToken(TokenKind.ASTERISK); |
127 case TokenChar.AMPERSAND: | 127 case TokenChar.AMPERSAND: |
128 return _finishToken(TokenKind.AMPERSAND); | 128 return _finishToken(TokenKind.AMPERSAND); |
129 case TokenChar.NAMESPACE: | 129 case TokenChar.NAMESPACE: |
| 130 if (_maybeEatChar(TokenChar.EQUALS)) { |
| 131 return _finishToken(TokenKind.DASH_MATCH); // |= |
| 132 } |
130 return _finishToken(TokenKind.NAMESPACE); | 133 return _finishToken(TokenKind.NAMESPACE); |
131 case TokenChar.COLON: | 134 case TokenChar.COLON: |
132 return _finishToken(TokenKind.COLON); | 135 return _finishToken(TokenKind.COLON); |
133 case TokenChar.COMMA: | 136 case TokenChar.COMMA: |
134 return _finishToken(TokenKind.COMMA); | 137 return _finishToken(TokenKind.COMMA); |
135 case TokenChar.SEMICOLON: | 138 case TokenChar.SEMICOLON: |
136 return _finishToken(TokenKind.SEMICOLON); | 139 return _finishToken(TokenKind.SEMICOLON); |
137 case TokenChar.PERCENT: | 140 case TokenChar.PERCENT: |
138 return _finishToken(TokenKind.PERCENT); | 141 return _finishToken(TokenKind.PERCENT); |
139 case TokenChar.SINGLE_QUOTE: | 142 case TokenChar.SINGLE_QUOTE: |
(...skipping 15 matching lines...) Expand all Loading... |
155 _maybeEatChar(CDATA_NAME[3]) && | 158 _maybeEatChar(CDATA_NAME[3]) && |
156 _maybeEatChar(CDATA_NAME[4]) && | 159 _maybeEatChar(CDATA_NAME[4]) && |
157 _maybeEatChar(TokenChar.LBRACK)) { | 160 _maybeEatChar(TokenChar.LBRACK)) { |
158 // <![CDATA[ | 161 // <![CDATA[ |
159 return next(); | 162 return next(); |
160 } | 163 } |
161 } | 164 } |
162 return _finishToken(TokenKind.LESS); | 165 return _finishToken(TokenKind.LESS); |
163 case TokenChar.EQUALS: | 166 case TokenChar.EQUALS: |
164 return _finishToken(TokenKind.EQUALS); | 167 return _finishToken(TokenKind.EQUALS); |
165 case TokenChar.OR: | |
166 if (_maybeEatChar(TokenChar.EQUALS)) { | |
167 return _finishToken(TokenKind.DASH_MATCH); // |= | |
168 } | |
169 return _finishToken(TokenKind.OR); | |
170 case TokenChar.CARET: | 168 case TokenChar.CARET: |
171 if (_maybeEatChar(TokenChar.EQUALS)) { | 169 if (_maybeEatChar(TokenChar.EQUALS)) { |
172 return _finishToken(TokenKind.PREFIX_MATCH); // ^= | 170 return _finishToken(TokenKind.PREFIX_MATCH); // ^= |
173 } | 171 } |
174 return _finishToken(TokenKind.CARET); | 172 return _finishToken(TokenKind.CARET); |
175 case TokenChar.DOLLAR: | 173 case TokenChar.DOLLAR: |
176 if (_maybeEatChar(TokenChar.EQUALS)) { | 174 if (_maybeEatChar(TokenChar.EQUALS)) { |
177 return _finishToken(TokenKind.SUFFIX_MATCH); // $= | 175 return _finishToken(TokenKind.SUFFIX_MATCH); // $= |
178 } | 176 } |
179 return _finishToken(TokenKind.DOLLAR); | 177 return _finishToken(TokenKind.DOLLAR); |
180 case TokenChar.BANG: | 178 case TokenChar.BANG: |
181 Token tok = finishIdentifier(ch); | 179 Token tok = finishIdentifier(); |
182 return (tok == null) ? _finishToken(TokenKind.BANG) : tok; | 180 return (tok == null) ? _finishToken(TokenKind.BANG) : tok; |
183 case TokenChar.BACKSLASH: | |
184 return _finishToken(TokenKind.BACKSLASH); | |
185 default: | 181 default: |
| 182 // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's |
| 183 // appropriate outside of a few specific places; certainly shouldn't |
| 184 // be parsed in selectors. |
| 185 if (!inSelector && ch == TokenChar.BACKSLASH) { |
| 186 return _finishToken(TokenKind.BACKSLASH); |
| 187 } |
| 188 |
186 if (unicodeRange) { | 189 if (unicodeRange) { |
187 // Three types of unicode ranges: | 190 // Three types of unicode ranges: |
188 // - single code point (e.g. U+416) | 191 // - single code point (e.g. U+416) |
189 // - interval value range (e.g. U+400-4ff) | 192 // - interval value range (e.g. U+400-4ff) |
190 // - range where trailing ‘?’ characters imply ‘any digit value’ | 193 // - range where trailing ‘?’ characters imply ‘any digit value’ |
191 // (e.g. U+4??) | 194 // (e.g. U+4??) |
192 if (maybeEatHexDigit()) { | 195 if (maybeEatHexDigit()) { |
193 var t = finishHexNumber(); | 196 var t = finishHexNumber(); |
194 // Any question marks then it's a HEX_RANGE not HEX_NUMBER. | 197 // Any question marks then it's a HEX_RANGE not HEX_NUMBER. |
195 if (maybeEatQuestionMark()) finishUnicodeRange(); | 198 if (maybeEatQuestionMark()) finishUnicodeRange(); |
196 return t; | 199 return t; |
197 } else if (maybeEatQuestionMark()) { | 200 } else if (maybeEatQuestionMark()) { |
198 // HEX_RANGE U+N??? | 201 // HEX_RANGE U+N??? |
199 return finishUnicodeRange(); | 202 return finishUnicodeRange(); |
200 } else { | 203 } else { |
201 return _errorToken(); | 204 return _errorToken(); |
202 } | 205 } |
203 } else if ((ch == UNICODE_U || ch == UNICODE_LOWER_U) && | 206 } else if ((ch == UNICODE_U || ch == UNICODE_LOWER_U) && |
204 (_peekChar() == UNICODE_PLUS)) { | 207 (_peekChar() == UNICODE_PLUS)) { |
205 // Unicode range: U+uNumber[-U+uNumber] | 208 // Unicode range: U+uNumber[-U+uNumber] |
206 // uNumber = 0..10FFFF | 209 // uNumber = 0..10FFFF |
207 _nextChar(); // Skip + | 210 _nextChar(); // Skip + |
208 _startIndex = _index; // Starts at the number | 211 _startIndex = _index; // Starts at the number |
209 return _finishToken(TokenKind.UNICODE_RANGE); | 212 return _finishToken(TokenKind.UNICODE_RANGE); |
210 } else if (varDef(ch)) { | 213 } else if (varDef(ch)) { |
211 return _finishToken(TokenKind.VAR_DEFINITION); | 214 return _finishToken(TokenKind.VAR_DEFINITION); |
212 } else if (varUsage(ch)) { | 215 } else if (varUsage(ch)) { |
213 return _finishToken(TokenKind.VAR_USAGE); | 216 return _finishToken(TokenKind.VAR_USAGE); |
214 } else if (TokenizerHelpers.isIdentifierStart(ch)) { | 217 } else if (TokenizerHelpers.isIdentifierStart(ch)) { |
215 return finishIdentifier(ch); | 218 return finishIdentifier(); |
216 } else if (TokenizerHelpers.isDigit(ch)) { | 219 } else if (TokenizerHelpers.isDigit(ch)) { |
217 return finishNumber(); | 220 return finishNumber(); |
218 } | 221 } |
219 return _errorToken(); | 222 return _errorToken(); |
220 } | 223 } |
221 } | 224 } |
222 | 225 |
223 bool varDef(int ch) { | 226 bool varDef(int ch) { |
224 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) && | 227 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) && |
225 _maybeEatChar('r'.codeUnitAt(0)) && _maybeEatChar('-'.codeUnitAt(0)); | 228 _maybeEatChar('r'.codeUnitAt(0)) && _maybeEatChar('-'.codeUnitAt(0)); |
226 } | 229 } |
227 | 230 |
228 bool varUsage(int ch) { | 231 bool varUsage(int ch) { |
229 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) && | 232 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) && |
230 _maybeEatChar('r'.codeUnitAt(0)) && (_peekChar() == '-'.codeUnitAt(0)); | 233 _maybeEatChar('r'.codeUnitAt(0)) && (_peekChar() == '-'.codeUnitAt(0)); |
231 } | 234 } |
232 | 235 |
233 Token _errorToken([String message = null]) { | 236 Token _errorToken([String message = null]) { |
234 return _finishToken(TokenKind.ERROR); | 237 return _finishToken(TokenKind.ERROR); |
235 } | 238 } |
236 | 239 |
237 int getIdentifierKind() { | 240 int getIdentifierKind() { |
238 // Is the identifier a unit type? | 241 // Is the identifier a unit type? |
239 int tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex); | 242 int tokId = -1; |
| 243 |
| 244 // Don't match units in selectors or selector expressions. |
| 245 if (!inSelectorExpression && !inSelector) { |
| 246 tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex); |
| 247 } |
240 if (tokId == -1) { | 248 if (tokId == -1) { |
241 tokId = (_text.substring(_startIndex, _index) == '!important') ? | 249 tokId = (_text.substring(_startIndex, _index) == '!important') ? |
242 TokenKind.IMPORTANT : -1; | 250 TokenKind.IMPORTANT : -1; |
243 } | 251 } |
244 | 252 |
245 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER; | 253 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER; |
246 } | 254 } |
247 | 255 |
248 // Need to override so CSS version of isIdentifierPart is used. | 256 Token finishIdentifier() { |
249 Token finishIdentifier(int ch) { | 257 // If we encounter an escape sequence, remember it so we can post-process |
| 258 // to unescape. |
| 259 bool hasEscapedChars = false; |
| 260 var chars = []; |
| 261 |
| 262 // backup so we can start with the first character |
| 263 int validateFrom = _index; |
| 264 _index = _startIndex; |
250 while (_index < _text.length) { | 265 while (_index < _text.length) { |
251 // If parsing in pseudo function expression then minus is an operator | 266 int ch = _text.codeUnitAt(_index); |
252 // not part of identifier. | 267 |
253 var isIdentifier = selectorExpression | 268 // If the previous character was "\" we need to escape. T |
254 ? TokenizerHelpers.isIdentifierPartExpr(_text.codeUnitAt(_index)) | 269 // http://www.w3.org/TR/CSS21/syndata.html#characters |
255 : TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index)); | 270 // if followed by hexadecimal digits, create the appropriate character. |
256 if (!isIdentifier) { | 271 // otherwise, include the character in the identifier and don't treat it |
257 break; | 272 // specially. |
| 273 if (ch == 92/*\*/) { |
| 274 int startHex = ++_index; |
| 275 eatHexDigits(startHex + 6); |
| 276 if (_index != startHex) { |
| 277 // Parse the hex digits and add that character. |
| 278 chars.add(int.parse('0x' + _text.substring(startHex, _index))); |
| 279 |
| 280 if (_index == _text.length) break; |
| 281 |
| 282 // if we stopped the hex because of a whitespace char, skip it |
| 283 ch = _text.codeUnitAt(_index); |
| 284 if (_index - startHex != 6 && |
| 285 (ch == TokenChar.SPACE || ch == TokenChar.TAB || |
| 286 ch == TokenChar.RETURN || ch == TokenChar.NEWLINE)) { |
| 287 _index++; |
| 288 } |
| 289 } else { |
| 290 // not a digit, just add the next character literally |
| 291 if (_index == _text.length) break; |
| 292 chars.add(_text.codeUnitAt(_index++)); |
| 293 } |
| 294 } else if (_index < validateFrom || (inSelectorExpression |
| 295 ? TokenizerHelpers.isIdentifierPartExpr(ch) |
| 296 : TokenizerHelpers.isIdentifierPart(ch))) { |
| 297 chars.add(ch); |
| 298 _index++; |
258 } else { | 299 } else { |
259 _index += 1; | 300 // Not an identifier or escaped character. |
| 301 break; |
260 } | 302 } |
261 } | 303 } |
262 | 304 |
263 int kind = getIdentifierKind(); | 305 var span = _file.span(_startIndex, _index); |
264 if (kind == TokenKind.IDENTIFIER) { | 306 var text = new String.fromCharCodes(chars); |
265 return _finishToken(TokenKind.IDENTIFIER); | |
266 } else { | |
267 return _finishToken(kind); | |
268 } | |
269 } | |
270 | 307 |
271 Token finishImportant() { | 308 return new IdentifierToken(text, getIdentifierKind(), span); |
272 | |
273 } | 309 } |
274 | 310 |
275 Token finishNumber() { | 311 Token finishNumber() { |
276 eatDigits(); | 312 eatDigits(); |
277 | 313 |
278 if (_peekChar() == 46/*.*/) { | 314 if (_peekChar() == 46/*.*/) { |
279 // Handle the case of 1.toString(). | 315 // Handle the case of 1.toString(). |
280 _nextChar(); | 316 _nextChar(); |
281 if (TokenizerHelpers.isDigit(_peekChar())) { | 317 if (TokenizerHelpers.isDigit(_peekChar())) { |
282 eatDigits(); | 318 eatDigits(); |
283 return _finishToken(TokenKind.DOUBLE); | 319 return _finishToken(TokenKind.DOUBLE); |
284 } else { | 320 } else { |
285 _index -= 1; | 321 _index -= 1; |
286 } | 322 } |
287 } | 323 } |
288 | 324 |
289 return _finishToken(TokenKind.INTEGER); | 325 return _finishToken(TokenKind.INTEGER); |
290 } | 326 } |
291 | 327 |
292 bool maybeEatDigit() { | 328 bool maybeEatDigit() { |
293 if (_index < _text.length | 329 if (_index < _text.length |
294 && TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) { | 330 && TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) { |
295 _index += 1; | 331 _index += 1; |
296 return true; | 332 return true; |
297 } | 333 } |
298 return false; | 334 return false; |
299 } | 335 } |
300 | 336 |
301 Token finishHexNumber() { | 337 Token finishHexNumber() { |
302 eatHexDigits(); | 338 eatHexDigits(_text.length); |
303 return _finishToken(TokenKind.HEX_INTEGER); | 339 return _finishToken(TokenKind.HEX_INTEGER); |
304 } | 340 } |
305 | 341 |
306 void eatHexDigits() { | 342 void eatHexDigits(int end) { |
307 while (_index < _text.length) { | 343 end = math.min(end, _text.length); |
| 344 while (_index < end) { |
308 if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) { | 345 if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) { |
309 _index += 1; | 346 _index += 1; |
310 } else { | 347 } else { |
311 return; | 348 return; |
312 } | 349 } |
313 } | 350 } |
314 } | 351 } |
315 | 352 |
316 bool maybeEatHexDigit() { | 353 bool maybeEatHexDigit() { |
317 if (_index < _text.length | 354 if (_index < _text.length |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
392 || (c >= 65/*A*/ && c <= 70/*F*/)); | 429 || (c >= 65/*A*/ && c <= 70/*F*/)); |
393 } | 430 } |
394 | 431 |
395 static bool isIdentifierPart(int c) { | 432 static bool isIdentifierPart(int c) { |
396 return isIdentifierPartExpr(c) || c == 45 /*-*/; | 433 return isIdentifierPartExpr(c) || c == 45 /*-*/; |
397 } | 434 } |
398 | 435 |
399 /** Pseudo function expressions identifiers can't have a minus sign. */ | 436 /** Pseudo function expressions identifiers can't have a minus sign. */ |
400 static bool isIdentifierStartExpr(int c) { | 437 static bool isIdentifierStartExpr(int c) { |
401 return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) || | 438 return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) || |
402 c == 95/*_*/); | 439 // Note: Unicode 10646 chars U+00A0 or higher are allowed, see: |
| 440 // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier |
| 441 // http://www.w3.org/TR/CSS21/syndata.html#characters |
| 442 // Also, escaped character should be allowed. |
| 443 c == 95/*_*/ || c >= 0xA0 || c == 92/*\*/); |
403 } | 444 } |
404 | 445 |
405 /** Pseudo function expressions identifiers can't have a minus sign. */ | 446 /** Pseudo function expressions identifiers can't have a minus sign. */ |
406 static bool isIdentifierPartExpr(int c) { | 447 static bool isIdentifierPartExpr(int c) { |
407 return (isIdentifierStartExpr(c) || isDigit(c)); | 448 return (isIdentifierStartExpr(c) || isDigit(c)); |
408 } | 449 } |
409 } | 450 } |
OLD | NEW |