OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org) | |
3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com) | |
4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc.
All rights reserved. | |
5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com> | |
6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org> | |
7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo
bile.com/) | |
8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. | |
9 * Copyright (C) 2012 Intel Corporation. All rights reserved. | |
10 * | |
11 * This library is free software; you can redistribute it and/or | |
12 * modify it under the terms of the GNU Library General Public | |
13 * License as published by the Free Software Foundation; either | |
14 * version 2 of the License, or (at your option) any later version. | |
15 * | |
16 * This library is distributed in the hope that it will be useful, | |
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 * Library General Public License for more details. | |
20 * | |
21 * You should have received a copy of the GNU Library General Public License | |
22 * along with this library; see the file COPYING.LIB. If not, write to | |
23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
24 * Boston, MA 02110-1301, USA. | |
25 */ | |
26 | |
27 #include "config.h" | |
28 #include "core/css/CSSTokenizer.h" | |
29 | |
30 #include "core/css/CSSKeyframeRule.h" | |
31 #include "core/css/parser/BisonCSSParser.h" | |
32 #include "core/css/CSSParserValues.h" | |
33 #include "core/css/MediaQuery.h" | |
34 #include "core/css/StyleRule.h" | |
35 #include "core/html/parser/HTMLParserIdioms.h" | |
36 #include "core/svg/SVGParserUtilities.h" | |
37 | |
38 namespace blink { | |
39 | |
40 #include "core/CSSGrammar.h" | |
41 | |
42 enum CharacterType { | |
43 // Types for the main switch. | |
44 | |
45 // The first 4 types must be grouped together, as they | |
46 // represent the allowed chars in an identifier. | |
47 CharacterCaselessU, | |
48 CharacterIdentifierStart, | |
49 CharacterNumber, | |
50 CharacterDash, | |
51 | |
52 CharacterOther, | |
53 CharacterNull, | |
54 CharacterWhiteSpace, | |
55 CharacterEndMediaQueryOrSupports, | |
56 CharacterEndNthChild, | |
57 CharacterQuote, | |
58 CharacterExclamationMark, | |
59 CharacterHashmark, | |
60 CharacterDollar, | |
61 CharacterAsterisk, | |
62 CharacterPlus, | |
63 CharacterDot, | |
64 CharacterSlash, | |
65 CharacterLess, | |
66 CharacterAt, | |
67 CharacterBackSlash, | |
68 CharacterXor, | |
69 CharacterVerticalBar, | |
70 CharacterTilde, | |
71 }; | |
72 | |
73 // 128 ASCII codes | |
74 static const CharacterType typesOfASCIICharacters[128] = { | |
75 /* 0 - Null */ CharacterNull, | |
76 /* 1 - Start of Heading */ CharacterOther, | |
77 /* 2 - Start of Text */ CharacterOther, | |
78 /* 3 - End of Text */ CharacterOther, | |
79 /* 4 - End of Transm. */ CharacterOther, | |
80 /* 5 - Enquiry */ CharacterOther, | |
81 /* 6 - Acknowledgment */ CharacterOther, | |
82 /* 7 - Bell */ CharacterOther, | |
83 /* 8 - Back Space */ CharacterOther, | |
84 /* 9 - Horizontal Tab */ CharacterWhiteSpace, | |
85 /* 10 - Line Feed */ CharacterWhiteSpace, | |
86 /* 11 - Vertical Tab */ CharacterOther, | |
87 /* 12 - Form Feed */ CharacterWhiteSpace, | |
88 /* 13 - Carriage Return */ CharacterWhiteSpace, | |
89 /* 14 - Shift Out */ CharacterOther, | |
90 /* 15 - Shift In */ CharacterOther, | |
91 /* 16 - Data Line Escape */ CharacterOther, | |
92 /* 17 - Device Control 1 */ CharacterOther, | |
93 /* 18 - Device Control 2 */ CharacterOther, | |
94 /* 19 - Device Control 3 */ CharacterOther, | |
95 /* 20 - Device Control 4 */ CharacterOther, | |
96 /* 21 - Negative Ack. */ CharacterOther, | |
97 /* 22 - Synchronous Idle */ CharacterOther, | |
98 /* 23 - End of Transmit */ CharacterOther, | |
99 /* 24 - Cancel */ CharacterOther, | |
100 /* 25 - End of Medium */ CharacterOther, | |
101 /* 26 - Substitute */ CharacterOther, | |
102 /* 27 - Escape */ CharacterOther, | |
103 /* 28 - File Separator */ CharacterOther, | |
104 /* 29 - Group Separator */ CharacterOther, | |
105 /* 30 - Record Separator */ CharacterOther, | |
106 /* 31 - Unit Separator */ CharacterOther, | |
107 /* 32 - Space */ CharacterWhiteSpace, | |
108 /* 33 - ! */ CharacterExclamationMark, | |
109 /* 34 - " */ CharacterQuote, | |
110 /* 35 - # */ CharacterHashmark, | |
111 /* 36 - $ */ CharacterDollar, | |
112 /* 37 - % */ CharacterOther, | |
113 /* 38 - & */ CharacterOther, | |
114 /* 39 - ' */ CharacterQuote, | |
115 /* 40 - ( */ CharacterOther, | |
116 /* 41 - ) */ CharacterEndNthChild, | |
117 /* 42 - * */ CharacterAsterisk, | |
118 /* 43 - + */ CharacterPlus, | |
119 /* 44 - , */ CharacterOther, | |
120 /* 45 - - */ CharacterDash, | |
121 /* 46 - . */ CharacterDot, | |
122 /* 47 - / */ CharacterSlash, | |
123 /* 48 - 0 */ CharacterNumber, | |
124 /* 49 - 1 */ CharacterNumber, | |
125 /* 50 - 2 */ CharacterNumber, | |
126 /* 51 - 3 */ CharacterNumber, | |
127 /* 52 - 4 */ CharacterNumber, | |
128 /* 53 - 5 */ CharacterNumber, | |
129 /* 54 - 6 */ CharacterNumber, | |
130 /* 55 - 7 */ CharacterNumber, | |
131 /* 56 - 8 */ CharacterNumber, | |
132 /* 57 - 9 */ CharacterNumber, | |
133 /* 58 - : */ CharacterOther, | |
134 /* 59 - ; */ CharacterEndMediaQueryOrSupports, | |
135 /* 60 - < */ CharacterLess, | |
136 /* 61 - = */ CharacterOther, | |
137 /* 62 - > */ CharacterOther, | |
138 /* 63 - ? */ CharacterOther, | |
139 /* 64 - @ */ CharacterAt, | |
140 /* 65 - A */ CharacterIdentifierStart, | |
141 /* 66 - B */ CharacterIdentifierStart, | |
142 /* 67 - C */ CharacterIdentifierStart, | |
143 /* 68 - D */ CharacterIdentifierStart, | |
144 /* 69 - E */ CharacterIdentifierStart, | |
145 /* 70 - F */ CharacterIdentifierStart, | |
146 /* 71 - G */ CharacterIdentifierStart, | |
147 /* 72 - H */ CharacterIdentifierStart, | |
148 /* 73 - I */ CharacterIdentifierStart, | |
149 /* 74 - J */ CharacterIdentifierStart, | |
150 /* 75 - K */ CharacterIdentifierStart, | |
151 /* 76 - L */ CharacterIdentifierStart, | |
152 /* 77 - M */ CharacterIdentifierStart, | |
153 /* 78 - N */ CharacterIdentifierStart, | |
154 /* 79 - O */ CharacterIdentifierStart, | |
155 /* 80 - P */ CharacterIdentifierStart, | |
156 /* 81 - Q */ CharacterIdentifierStart, | |
157 /* 82 - R */ CharacterIdentifierStart, | |
158 /* 83 - S */ CharacterIdentifierStart, | |
159 /* 84 - T */ CharacterIdentifierStart, | |
160 /* 85 - U */ CharacterCaselessU, | |
161 /* 86 - V */ CharacterIdentifierStart, | |
162 /* 87 - W */ CharacterIdentifierStart, | |
163 /* 88 - X */ CharacterIdentifierStart, | |
164 /* 89 - Y */ CharacterIdentifierStart, | |
165 /* 90 - Z */ CharacterIdentifierStart, | |
166 /* 91 - [ */ CharacterOther, | |
167 /* 92 - \ */ CharacterBackSlash, | |
168 /* 93 - ] */ CharacterOther, | |
169 /* 94 - ^ */ CharacterXor, | |
170 /* 95 - _ */ CharacterIdentifierStart, | |
171 /* 96 - ` */ CharacterOther, | |
172 /* 97 - a */ CharacterIdentifierStart, | |
173 /* 98 - b */ CharacterIdentifierStart, | |
174 /* 99 - c */ CharacterIdentifierStart, | |
175 /* 100 - d */ CharacterIdentifierStart, | |
176 /* 101 - e */ CharacterIdentifierStart, | |
177 /* 102 - f */ CharacterIdentifierStart, | |
178 /* 103 - g */ CharacterIdentifierStart, | |
179 /* 104 - h */ CharacterIdentifierStart, | |
180 /* 105 - i */ CharacterIdentifierStart, | |
181 /* 106 - j */ CharacterIdentifierStart, | |
182 /* 107 - k */ CharacterIdentifierStart, | |
183 /* 108 - l */ CharacterIdentifierStart, | |
184 /* 109 - m */ CharacterIdentifierStart, | |
185 /* 110 - n */ CharacterIdentifierStart, | |
186 /* 111 - o */ CharacterIdentifierStart, | |
187 /* 112 - p */ CharacterIdentifierStart, | |
188 /* 113 - q */ CharacterIdentifierStart, | |
189 /* 114 - r */ CharacterIdentifierStart, | |
190 /* 115 - s */ CharacterIdentifierStart, | |
191 /* 116 - t */ CharacterIdentifierStart, | |
192 /* 117 - u */ CharacterCaselessU, | |
193 /* 118 - v */ CharacterIdentifierStart, | |
194 /* 119 - w */ CharacterIdentifierStart, | |
195 /* 120 - x */ CharacterIdentifierStart, | |
196 /* 121 - y */ CharacterIdentifierStart, | |
197 /* 122 - z */ CharacterIdentifierStart, | |
198 /* 123 - { */ CharacterEndMediaQueryOrSupports, | |
199 /* 124 - | */ CharacterVerticalBar, | |
200 /* 125 - } */ CharacterOther, | |
201 /* 126 - ~ */ CharacterTilde, | |
202 /* 127 - Delete */ CharacterOther, | |
203 }; | |
204 | |
205 // Utility functions for the CSS tokenizer. | |
206 | |
207 template <typename CharacterType> | |
208 static inline bool isCSSLetter(CharacterType character) | |
209 { | |
210 return character >= 128 || typesOfASCIICharacters[character] <= CharacterDas
h; | |
211 } | |
212 | |
213 template <typename CharacterType> | |
214 static inline bool isCSSEscape(CharacterType character) | |
215 { | |
216 return character >= ' ' && character != 127; | |
217 } | |
218 | |
219 template <typename CharacterType> | |
220 static inline bool isURILetter(CharacterType character) | |
221 { | |
222 return (character >= '*' && character != 127) || (character >= '#' && charac
ter <= '&') || character == '!'; | |
223 } | |
224 | |
225 template <typename CharacterType> | |
226 static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter) | |
227 { | |
228 return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || cu
rrentCharacter[0] >= 128 | |
229 || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1])); | |
230 } | |
231 | |
232 template <typename CharacterType> | |
233 static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char*
constantString) | |
234 { | |
235 // Compare an character memory data with a zero terminated string. | |
236 do { | |
237 // The input must be part of an identifier if constantChar or constStrin
g | |
238 // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to
'-'. | |
239 ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantSt
ring == '-'); | |
240 ASSERT(*constantString != '-' || isCSSLetter(*cssString)); | |
241 if (toASCIILowerUnchecked(*cssString++) != (*constantString++)) | |
242 return false; | |
243 } while (*constantString); | |
244 return true; | |
245 } | |
246 | |
247 template <typename CharacterType> | |
248 static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, co
nst char* constantString) | |
249 { | |
250 ASSERT(*constantString); | |
251 | |
252 do { | |
253 if (*string++ != *constantString++) | |
254 return false; | |
255 } while (*constantString); | |
256 return true; | |
257 } | |
258 | |
259 template <typename CharacterType> | |
260 static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter) | |
261 { | |
262 // Returns with 0, if escape check is failed. Otherwise | |
263 // it returns with the following character. | |
264 ASSERT(*currentCharacter == '\\'); | |
265 | |
266 ++currentCharacter; | |
267 if (!isCSSEscape(*currentCharacter)) | |
268 return 0; | |
269 | |
270 if (isASCIIHexDigit(*currentCharacter)) { | |
271 int length = 6; | |
272 | |
273 do { | |
274 ++currentCharacter; | |
275 } while (isASCIIHexDigit(*currentCharacter) && --length); | |
276 | |
277 // Optional space after the escape sequence. | |
278 if (isHTMLSpace<CharacterType>(*currentCharacter)) | |
279 ++currentCharacter; | |
280 return currentCharacter; | |
281 } | |
282 return currentCharacter + 1; | |
283 } | |
284 | |
285 template <typename CharacterType> | |
286 static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter) | |
287 { | |
288 while (isHTMLSpace<CharacterType>(*currentCharacter)) | |
289 ++currentCharacter; | |
290 return currentCharacter; | |
291 } | |
292 | |
293 // Main CSS tokenizer functions. | |
294 | |
295 template <> | |
296 inline LChar*& CSSTokenizer::currentCharacter<LChar>() | |
297 { | |
298 return m_currentCharacter8; | |
299 } | |
300 | |
301 template <> | |
302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() | |
303 { | |
304 return m_currentCharacter16; | |
305 } | |
306 | |
307 UChar* CSSTokenizer::allocateStringBuffer16(size_t len) | |
308 { | |
309 // Allocates and returns a CSSTokenizer owned buffer for storing | |
310 // UTF-16 data. Used to get a suitable life span for UTF-16 | |
311 // strings, identifiers and URIs created by the tokenizer. | |
312 OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]); | |
313 | |
314 UChar* bufferPtr = buffer.get(); | |
315 | |
316 m_cssStrings16.append(buffer.release()); | |
317 return bufferPtr; | |
318 } | |
319 | |
320 template <> | |
321 inline LChar* CSSTokenizer::dataStart<LChar>() | |
322 { | |
323 return m_dataStart8.get(); | |
324 } | |
325 | |
326 template <> | |
327 inline UChar* CSSTokenizer::dataStart<UChar>() | |
328 { | |
329 return m_dataStart16.get(); | |
330 } | |
331 | |
332 template <typename CharacterType> | |
333 inline CSSParserLocation CSSTokenizer::tokenLocation() | |
334 { | |
335 CSSParserLocation location; | |
336 location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterT
ype>() - tokenStart<CharacterType>()); | |
337 location.lineNumber = m_tokenStartLineNumber; | |
338 location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>(); | |
339 return location; | |
340 } | |
341 | |
342 CSSParserLocation CSSTokenizer::currentLocation() | |
343 { | |
344 if (is8BitSource()) | |
345 return tokenLocation<LChar>(); | |
346 return tokenLocation<UChar>(); | |
347 } | |
348 | |
349 template <typename CharacterType> | |
350 inline bool CSSTokenizer::isIdentifierStart() | |
351 { | |
352 // Check whether an identifier is started. | |
353 return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-'
) ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1); | |
354 } | |
355 | |
356 enum CheckStringValidationMode { | |
357 AbortIfInvalid, | |
358 SkipInvalid | |
359 }; | |
360 | |
361 template <typename CharacterType> | |
362 static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter,
int quote, CheckStringValidationMode mode) | |
363 { | |
364 // If mode is AbortIfInvalid and the string check fails it returns | |
365 // with 0. Otherwise it returns with a pointer to the first | |
366 // character after the string. | |
367 while (true) { | |
368 if (UNLIKELY(*currentCharacter == quote)) { | |
369 // String parsing is successful. | |
370 return currentCharacter + 1; | |
371 } | |
372 if (UNLIKELY(!*currentCharacter)) { | |
373 // String parsing is successful up to end of input. | |
374 return currentCharacter; | |
375 } | |
376 if (mode == AbortIfInvalid && UNLIKELY(*currentCharacter <= '\r' && (*cu
rrentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) { | |
377 // String parsing is failed for character '\n', '\f' or '\r'. | |
378 return 0; | |
379 } | |
380 | |
381 if (LIKELY(currentCharacter[0] != '\\')) { | |
382 ++currentCharacter; | |
383 } else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') { | |
384 currentCharacter += 2; | |
385 } else if (currentCharacter[1] == '\r') { | |
386 currentCharacter += currentCharacter[2] == '\n' ? 3 : 2; | |
387 } else { | |
388 CharacterType* next = checkAndSkipEscape(currentCharacter); | |
389 if (!next) { | |
390 if (mode == AbortIfInvalid) | |
391 return 0; | |
392 next = currentCharacter + 1; | |
393 } | |
394 currentCharacter = next; | |
395 } | |
396 } | |
397 } | |
398 | |
399 template <typename CharacterType> | |
400 unsigned CSSTokenizer::parseEscape(CharacterType*& src) | |
401 { | |
402 ASSERT(*src == '\\' && isCSSEscape(src[1])); | |
403 | |
404 unsigned unicode = 0; | |
405 | |
406 ++src; | |
407 if (isASCIIHexDigit(*src)) { | |
408 | |
409 int length = 6; | |
410 | |
411 do { | |
412 unicode = (unicode << 4) + toASCIIHexValue(*src++); | |
413 } while (--length && isASCIIHexDigit(*src)); | |
414 | |
415 // Characters above 0x10ffff are not handled. | |
416 if (unicode > 0x10ffff) | |
417 unicode = 0xfffd; | |
418 | |
419 // Optional space after the escape sequence. | |
420 if (isHTMLSpace<CharacterType>(*src)) | |
421 ++src; | |
422 | |
423 return unicode; | |
424 } | |
425 | |
426 return *src++; | |
427 } | |
428 | |
429 template <> | |
430 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode
) | |
431 { | |
432 ASSERT(unicode <= 0xff); | |
433 *result = unicode; | |
434 | |
435 ++result; | |
436 } | |
437 | |
438 template <> | |
439 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode
) | |
440 { | |
441 // Replace unicode with a surrogate pairs when it is bigger than 0xffff | |
442 if (U16_LENGTH(unicode) == 2) { | |
443 *result++ = U16_LEAD(unicode); | |
444 *result = U16_TRAIL(unicode); | |
445 } else { | |
446 *result = unicode; | |
447 } | |
448 | |
449 ++result; | |
450 } | |
451 | |
452 template <typename SrcCharacterType> | |
453 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src) | |
454 { | |
455 // The decoded form of an identifier (after resolving escape | |
456 // sequences) will not contain more characters (ASCII or UTF-16 | |
457 // codepoints) than the input. This code can therefore ignore | |
458 // escape sequences completely. | |
459 SrcCharacterType* start = src; | |
460 do { | |
461 if (LIKELY(*src != '\\')) | |
462 src++; | |
463 else | |
464 parseEscape<SrcCharacterType>(src); | |
465 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1]))); | |
466 | |
467 return src - start; | |
468 } | |
469 | |
470 template <typename SrcCharacterType, typename DestCharacterType> | |
471 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh
aracterType*& result, bool& hasEscape) | |
472 { | |
473 hasEscape = false; | |
474 do { | |
475 if (LIKELY(*src != '\\')) { | |
476 *result++ = *src++; | |
477 } else { | |
478 hasEscape = true; | |
479 SrcCharacterType* savedEscapeStart = src; | |
480 unsigned unicode = parseEscape<SrcCharacterType>(src); | |
481 if (unicode > 0xff && sizeof(DestCharacterType) == 1) { | |
482 src = savedEscapeStart; | |
483 return false; | |
484 } | |
485 UnicodeToChars(result, unicode); | |
486 } | |
487 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1]))); | |
488 | |
489 return true; | |
490 } | |
491 | |
492 template <typename CharacterType> | |
493 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin
g& resultString, bool& hasEscape) | |
494 { | |
495 // If a valid identifier start is found, we can safely | |
496 // parse the identifier until the next invalid character. | |
497 ASSERT(isIdentifierStart<CharacterType>()); | |
498 | |
499 CharacterType* start = currentCharacter<CharacterType>(); | |
500 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res
ult, hasEscape))) { | |
501 // Found an escape we couldn't handle with 8 bits, copy what has been re
cognized and continue | |
502 ASSERT(is8BitSource()); | |
503 UChar* result16 = allocateStringBuffer16((result - start) + peekMaxIdent
ifierLen(currentCharacter<CharacterType>())); | |
504 UChar* start16 = result16; | |
505 int i = 0; | |
506 for (; i < result - start; i++) | |
507 result16[i] = start[i]; | |
508 | |
509 result16 += i; | |
510 | |
511 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has
Escape); | |
512 | |
513 resultString.init(start16, result16 - start16); | |
514 | |
515 return; | |
516 } | |
517 | |
518 resultString.init(start, result - start); | |
519 } | |
520 | |
521 template <typename SrcCharacterType> | |
522 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote) | |
523 { | |
524 // The decoded form of a CSS string (after resolving escape | |
525 // sequences) will not contain more characters (ASCII or UTF-16 | |
526 // codepoints) than the input. This code can therefore ignore | |
527 // escape sequences completely and just return the length of the | |
528 // input string (possibly including terminating quote if any). | |
529 SrcCharacterType* end = checkAndSkipString(src, quote, SkipInvalid); | |
530 return end ? end - src : 0; | |
531 } | |
532 | |
533 template <typename SrcCharacterType, typename DestCharacterType> | |
534 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac
terType*& result, UChar quote) | |
535 { | |
536 while (true) { | |
537 if (UNLIKELY(*src == quote)) { | |
538 // String parsing is done. | |
539 ++src; | |
540 return true; | |
541 } | |
542 if (UNLIKELY(!*src)) { | |
543 // String parsing is done, but don't advance pointer if at the end o
f input. | |
544 return true; | |
545 } | |
546 if (LIKELY(src[0] != '\\')) { | |
547 *result++ = *src++; | |
548 } else if (src[1] == '\n' || src[1] == '\f') { | |
549 src += 2; | |
550 } else if (src[1] == '\r') { | |
551 src += src[2] == '\n' ? 3 : 2; | |
552 } else { | |
553 SrcCharacterType* savedEscapeStart = src; | |
554 unsigned unicode = parseEscape<SrcCharacterType>(src); | |
555 if (unicode > 0xff && sizeof(DestCharacterType) == 1) { | |
556 src = savedEscapeStart; | |
557 return false; | |
558 } | |
559 UnicodeToChars(result, unicode); | |
560 } | |
561 } | |
562 | |
563 return true; | |
564 } | |
565 | |
566 template <typename CharacterType> | |
567 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r
esultString, UChar quote) | |
568 { | |
569 CharacterType* start = currentCharacter<CharacterType>(); | |
570 | |
571 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result,
quote))) { | |
572 // Found an escape we couldn't handle with 8 bits, copy what has been re
cognized and continue | |
573 ASSERT(is8BitSource()); | |
574 UChar* result16 = allocateStringBuffer16((result - start) + peekMaxStrin
gLen(currentCharacter<CharacterType>(), quote)); | |
575 UChar* start16 = result16; | |
576 int i = 0; | |
577 for (; i < result - start; i++) | |
578 result16[i] = start[i]; | |
579 | |
580 result16 += i; | |
581 | |
582 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); | |
583 | |
584 resultString.init(start16, result16 - start16); | |
585 return; | |
586 } | |
587 | |
588 resultString.init(start, result - start); | |
589 } | |
590 | |
591 template <typename CharacterType> | |
592 inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UC
har& quote) | |
593 { | |
594 start = skipWhiteSpace(currentCharacter<CharacterType>()); | |
595 | |
596 if (*start == '"' || *start == '\'') { | |
597 quote = *start++; | |
598 end = checkAndSkipString(start, quote, AbortIfInvalid); | |
599 if (!end) | |
600 return false; | |
601 } else { | |
602 quote = 0; | |
603 end = start; | |
604 while (isURILetter(*end)) { | |
605 if (LIKELY(*end != '\\')) { | |
606 ++end; | |
607 } else { | |
608 end = checkAndSkipEscape(end); | |
609 if (!end) | |
610 return false; | |
611 } | |
612 } | |
613 } | |
614 | |
615 end = skipWhiteSpace(end); | |
616 if (*end != ')') | |
617 return false; | |
618 | |
619 return true; | |
620 } | |
621 | |
622 template <typename SrcCharacterType> | |
623 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote) | |
624 { | |
625 // The decoded form of a URI (after resolving escape sequences) | |
626 // will not contain more characters (ASCII or UTF-16 codepoints) | |
627 // than the input. This code can therefore ignore escape sequences | |
628 // completely. | |
629 SrcCharacterType* start = src; | |
630 if (quote) { | |
631 ASSERT(quote == '"' || quote == '\''); | |
632 return peekMaxStringLen(src, quote); | |
633 } | |
634 | |
635 while (isURILetter(*src)) { | |
636 if (LIKELY(*src != '\\')) | |
637 src++; | |
638 else | |
639 parseEscape<SrcCharacterType>(src); | |
640 } | |
641 | |
642 return src - start; | |
643 } | |
644 | |
645 template <typename SrcCharacterType, typename DestCharacterType> | |
646 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter
Type*& dest, UChar quote) | |
647 { | |
648 if (quote) { | |
649 ASSERT(quote == '"' || quote == '\''); | |
650 return parseStringInternal(src, dest, quote); | |
651 } | |
652 | |
653 while (isURILetter(*src)) { | |
654 if (LIKELY(*src != '\\')) { | |
655 *dest++ = *src++; | |
656 } else { | |
657 unsigned unicode = parseEscape<SrcCharacterType>(src); | |
658 if (unicode > 0xff && sizeof(DestCharacterType) == 1) | |
659 return false; | |
660 UnicodeToChars(dest, unicode); | |
661 } | |
662 } | |
663 | |
664 return true; | |
665 } | |
666 | |
667 template <typename CharacterType> | |
668 inline void CSSTokenizer::parseURI(CSSParserString& string) | |
669 { | |
670 CharacterType* uriStart; | |
671 CharacterType* uriEnd; | |
672 UChar quote; | |
673 if (!findURI(uriStart, uriEnd, quote)) | |
674 return; | |
675 | |
676 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; | |
677 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))
) { | |
678 string.init(uriStart, dest - uriStart); | |
679 } else { | |
680 // An escape sequence was encountered that can't be stored in 8 bits. | |
681 // Reset the current character to the start of the URI and re-parse with | |
682 // a 16-bit destination. | |
683 ASSERT(is8BitSource()); | |
684 currentCharacter<CharacterType>() = uriStart; | |
685 UChar* result16 = allocateStringBuffer16(peekMaxURILen(currentCharacter<
CharacterType>(), quote)); | |
686 UChar* uriStart16 = result16; | |
687 bool result = parseURIInternal(currentCharacter<CharacterType>(), result
16, quote); | |
688 ASSERT_UNUSED(result, result); | |
689 string.init(uriStart16, result16 - uriStart16); | |
690 } | |
691 | |
692 currentCharacter<CharacterType>() = uriEnd + 1; | |
693 m_token = URI; | |
694 } | |
695 | |
696 template <typename CharacterType> | |
697 inline bool CSSTokenizer::parseUnicodeRange() | |
698 { | |
699 CharacterType* character = currentCharacter<CharacterType>() + 1; | |
700 int length = 6; | |
701 ASSERT(*currentCharacter<CharacterType>() == '+'); | |
702 | |
703 while (isASCIIHexDigit(*character) && length) { | |
704 ++character; | |
705 --length; | |
706 } | |
707 | |
708 if (length && *character == '?') { | |
709 // At most 5 hex digit followed by a question mark. | |
710 do { | |
711 ++character; | |
712 --length; | |
713 } while (*character == '?' && length); | |
714 currentCharacter<CharacterType>() = character; | |
715 return true; | |
716 } | |
717 | |
718 if (length < 6) { | |
719 // At least one hex digit. | |
720 if (character[0] == '-' && isASCIIHexDigit(character[1])) { | |
721 // Followed by a dash and a hex digit. | |
722 ++character; | |
723 length = 6; | |
724 do { | |
725 ++character; | |
726 } while (--length && isASCIIHexDigit(*character)); | |
727 } | |
728 currentCharacter<CharacterType>() = character; | |
729 return true; | |
730 } | |
731 return false; | |
732 } | |
733 | |
734 template <typename CharacterType> | |
735 bool CSSTokenizer::parseNthChild() | |
736 { | |
737 CharacterType* character = currentCharacter<CharacterType>(); | |
738 | |
739 while (isASCIIDigit(*character)) | |
740 ++character; | |
741 if (isASCIIAlphaCaselessEqual(*character, 'n')) { | |
742 currentCharacter<CharacterType>() = character + 1; | |
743 return true; | |
744 } | |
745 return false; | |
746 } | |
747 | |
748 template <typename CharacterType> | |
749 bool CSSTokenizer::parseNthChildExtra() | |
750 { | |
751 CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>())
; | |
752 if (*character != '+' && *character != '-') | |
753 return false; | |
754 | |
755 character = skipWhiteSpace(character + 1); | |
756 if (!isASCIIDigit(*character)) | |
757 return false; | |
758 | |
759 do { | |
760 ++character; | |
761 } while (isASCIIDigit(*character)); | |
762 | |
763 currentCharacter<CharacterType>() = character; | |
764 return true; | |
765 } | |
766 | |
767 template <typename CharacterType> | |
768 inline bool CSSTokenizer::detectFunctionTypeToken(int length) | |
769 { | |
770 ASSERT(length > 0); | |
771 CharacterType* name = tokenStart<CharacterType>(); | |
772 SWITCH(name, length) { | |
773 CASE("not") { | |
774 m_token = NOTFUNCTION; | |
775 return true; | |
776 } | |
777 CASE("url") { | |
778 m_token = URI; | |
779 return true; | |
780 } | |
781 CASE("cue") { | |
782 m_token = CUEFUNCTION; | |
783 return true; | |
784 } | |
785 CASE("calc") { | |
786 m_token = CALCFUNCTION; | |
787 return true; | |
788 } | |
789 CASE("host") { | |
790 m_token = HOSTFUNCTION; | |
791 return true; | |
792 } | |
793 CASE("host-context") { | |
794 m_token = HOSTCONTEXTFUNCTION; | |
795 return true; | |
796 } | |
797 CASE("nth-child") { | |
798 m_parsingMode = NthChildMode; | |
799 return true; | |
800 } | |
801 CASE("nth-of-type") { | |
802 m_parsingMode = NthChildMode; | |
803 return true; | |
804 } | |
805 CASE("nth-last-child") { | |
806 m_parsingMode = NthChildMode; | |
807 return true; | |
808 } | |
809 CASE("nth-last-of-type") { | |
810 m_parsingMode = NthChildMode; | |
811 return true; | |
812 } | |
813 } | |
814 return false; | |
815 } | |
816 | |
817 template <typename CharacterType> | |
818 inline void CSSTokenizer::detectMediaQueryToken(int length) | |
819 { | |
820 ASSERT(m_parsingMode == MediaQueryMode); | |
821 CharacterType* name = tokenStart<CharacterType>(); | |
822 | |
823 SWITCH(name, length) { | |
824 CASE("and") { | |
825 m_token = MEDIA_AND; | |
826 } | |
827 CASE("not") { | |
828 m_token = MEDIA_NOT; | |
829 } | |
830 CASE("only") { | |
831 m_token = MEDIA_ONLY; | |
832 } | |
833 CASE("or") { | |
834 m_token = MEDIA_OR; | |
835 } | |
836 } | |
837 } | |
838 | |
839 template <typename CharacterType> | |
840 inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length) | |
841 { | |
842 ASSERT(length > 0); | |
843 | |
844 SWITCH(type, length) { | |
845 CASE("cm") { | |
846 m_token = CMS; | |
847 } | |
848 CASE("ch") { | |
849 m_token = CHS; | |
850 } | |
851 CASE("deg") { | |
852 m_token = DEGS; | |
853 } | |
854 CASE("dppx") { | |
855 // There is a discussion about the name of this unit on www-style. | |
856 // Keep this compile time guard in place until that is resolved. | |
857 // http://lists.w3.org/Archives/Public/www-style/2012May/0915.html | |
858 m_token = DPPX; | |
859 } | |
860 CASE("dpcm") { | |
861 m_token = DPCM; | |
862 } | |
863 CASE("dpi") { | |
864 m_token = DPI; | |
865 } | |
866 CASE("em") { | |
867 m_token = EMS; | |
868 } | |
869 CASE("ex") { | |
870 m_token = EXS; | |
871 } | |
872 CASE("fr") { | |
873 m_token = FR; | |
874 } | |
875 CASE("grad") { | |
876 m_token = GRADS; | |
877 } | |
878 CASE("hz") { | |
879 m_token = HERTZ; | |
880 } | |
881 CASE("in") { | |
882 m_token = INS; | |
883 } | |
884 CASE("khz") { | |
885 m_token = KHERTZ; | |
886 } | |
887 CASE("mm") { | |
888 m_token = MMS; | |
889 } | |
890 CASE("ms") { | |
891 m_token = MSECS; | |
892 } | |
893 CASE("px") { | |
894 m_token = PXS; | |
895 } | |
896 CASE("pt") { | |
897 m_token = PTS; | |
898 } | |
899 CASE("pc") { | |
900 m_token = PCS; | |
901 } | |
902 CASE("rad") { | |
903 m_token = RADS; | |
904 } | |
905 CASE("rem") { | |
906 m_token = REMS; | |
907 } | |
908 CASE("s") { | |
909 m_token = SECS; | |
910 } | |
911 CASE("turn") { | |
912 m_token = TURNS; | |
913 } | |
914 CASE("vw") { | |
915 m_token = VW; | |
916 } | |
917 CASE("vh") { | |
918 m_token = VH; | |
919 } | |
920 CASE("vmin") { | |
921 m_token = VMIN; | |
922 } | |
923 CASE("vmax") { | |
924 m_token = VMAX; | |
925 } | |
926 CASE("__qem") { | |
927 m_token = QEMS; | |
928 } | |
929 } | |
930 } | |
931 | |
932 template <typename CharacterType> | |
933 inline void CSSTokenizer::detectDashToken(int length) | |
934 { | |
935 CharacterType* name = tokenStart<CharacterType>(); | |
936 | |
937 // Ignore leading dash. | |
938 ++name; | |
939 --length; | |
940 | |
941 SWITCH(name, length) { | |
942 CASE("webkit-any") { | |
943 m_token = ANYFUNCTION; | |
944 } | |
945 CASE("webkit-calc") { | |
946 m_token = CALCFUNCTION; | |
947 } | |
948 } | |
949 } | |
950 | |
951 template <typename CharacterType> | |
952 inline void CSSTokenizer::detectAtToken(int length, bool hasEscape) | |
953 { | |
954 CharacterType* name = tokenStart<CharacterType>(); | |
955 ASSERT(name[0] == '@' && length >= 2); | |
956 | |
957 // Ignore leading @. | |
958 ++name; | |
959 --length; | |
960 | |
961 // charset, font-face, import, media, namespace, page, supports, | |
962 // -webkit-keyframes, keyframes, and -webkit-mediaquery are not affected by
hasEscape. | |
963 SWITCH(name, length) { | |
964 CASE("bottom-left") { | |
965 if (LIKELY(!hasEscape)) | |
966 m_token = BOTTOMLEFT_SYM; | |
967 } | |
968 CASE("bottom-right") { | |
969 if (LIKELY(!hasEscape)) | |
970 m_token = BOTTOMRIGHT_SYM; | |
971 } | |
972 CASE("bottom-center") { | |
973 if (LIKELY(!hasEscape)) | |
974 m_token = BOTTOMCENTER_SYM; | |
975 } | |
976 CASE("bottom-left-corner") { | |
977 if (LIKELY(!hasEscape)) | |
978 m_token = BOTTOMLEFTCORNER_SYM; | |
979 } | |
980 CASE("bottom-right-corner") { | |
981 if (LIKELY(!hasEscape)) | |
982 m_token = BOTTOMRIGHTCORNER_SYM; | |
983 } | |
984 CASE("charset") { | |
985 if (name - 1 == dataStart<CharacterType>()) | |
986 m_token = CHARSET_SYM; | |
987 } | |
988 CASE("font-face") { | |
989 m_token = FONT_FACE_SYM; | |
990 } | |
991 CASE("import") { | |
992 m_parsingMode = MediaQueryMode; | |
993 m_token = IMPORT_SYM; | |
994 } | |
995 CASE("keyframes") { | |
996 if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled()) | |
997 m_token = KEYFRAMES_SYM; | |
998 } | |
999 CASE("left-top") { | |
1000 if (LIKELY(!hasEscape)) | |
1001 m_token = LEFTTOP_SYM; | |
1002 } | |
1003 CASE("left-middle") { | |
1004 if (LIKELY(!hasEscape)) | |
1005 m_token = LEFTMIDDLE_SYM; | |
1006 } | |
1007 CASE("left-bottom") { | |
1008 if (LIKELY(!hasEscape)) | |
1009 m_token = LEFTBOTTOM_SYM; | |
1010 } | |
1011 CASE("media") { | |
1012 m_parsingMode = MediaQueryMode; | |
1013 m_token = MEDIA_SYM; | |
1014 } | |
1015 CASE("namespace") { | |
1016 m_token = NAMESPACE_SYM; | |
1017 } | |
1018 CASE("page") { | |
1019 m_token = PAGE_SYM; | |
1020 } | |
1021 CASE("right-top") { | |
1022 if (LIKELY(!hasEscape)) | |
1023 m_token = RIGHTTOP_SYM; | |
1024 } | |
1025 CASE("right-middle") { | |
1026 if (LIKELY(!hasEscape)) | |
1027 m_token = RIGHTMIDDLE_SYM; | |
1028 } | |
1029 CASE("right-bottom") { | |
1030 if (LIKELY(!hasEscape)) | |
1031 m_token = RIGHTBOTTOM_SYM; | |
1032 } | |
1033 CASE("supports") { | |
1034 m_parsingMode = SupportsMode; | |
1035 m_token = SUPPORTS_SYM; | |
1036 } | |
1037 CASE("top-left") { | |
1038 if (LIKELY(!hasEscape)) | |
1039 m_token = TOPLEFT_SYM; | |
1040 } | |
1041 CASE("top-right") { | |
1042 if (LIKELY(!hasEscape)) | |
1043 m_token = TOPRIGHT_SYM; | |
1044 } | |
1045 CASE("top-center") { | |
1046 if (LIKELY(!hasEscape)) | |
1047 m_token = TOPCENTER_SYM; | |
1048 } | |
1049 CASE("top-left-corner") { | |
1050 if (LIKELY(!hasEscape)) | |
1051 m_token = TOPLEFTCORNER_SYM; | |
1052 } | |
1053 CASE("top-right-corner") { | |
1054 if (LIKELY(!hasEscape)) | |
1055 m_token = TOPRIGHTCORNER_SYM; | |
1056 } | |
1057 CASE("viewport") { | |
1058 m_token = VIEWPORT_RULE_SYM; | |
1059 } | |
1060 CASE("-internal-rule") { | |
1061 if (LIKELY(!hasEscape && m_internal)) | |
1062 m_token = INTERNAL_RULE_SYM; | |
1063 } | |
1064 CASE("-internal-decls") { | |
1065 if (LIKELY(!hasEscape && m_internal)) | |
1066 m_token = INTERNAL_DECLS_SYM; | |
1067 } | |
1068 CASE("-internal-value") { | |
1069 if (LIKELY(!hasEscape && m_internal)) | |
1070 m_token = INTERNAL_VALUE_SYM; | |
1071 } | |
1072 CASE("-webkit-keyframes") { | |
1073 m_token = WEBKIT_KEYFRAMES_SYM; | |
1074 } | |
1075 CASE("-internal-selector") { | |
1076 if (LIKELY(!hasEscape && m_internal)) | |
1077 m_token = INTERNAL_SELECTOR_SYM; | |
1078 } | |
1079 CASE("-internal-medialist") { | |
1080 if (!m_internal) | |
1081 return; | |
1082 m_parsingMode = MediaQueryMode; | |
1083 m_token = INTERNAL_MEDIALIST_SYM; | |
1084 } | |
1085 CASE("-internal-keyframe-rule") { | |
1086 if (LIKELY(!hasEscape && m_internal)) | |
1087 m_token = INTERNAL_KEYFRAME_RULE_SYM; | |
1088 } | |
1089 CASE("-internal-keyframe-key-list") { | |
1090 if (!m_internal) | |
1091 return; | |
1092 m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM; | |
1093 } | |
1094 CASE("-internal-supports-condition") { | |
1095 if (!m_internal) | |
1096 return; | |
1097 m_parsingMode = SupportsMode; | |
1098 m_token = INTERNAL_SUPPORTS_CONDITION_SYM; | |
1099 } | |
1100 } | |
1101 } | |
1102 | |
1103 template <typename CharacterType> | |
1104 inline void CSSTokenizer::detectSupportsToken(int length) | |
1105 { | |
1106 ASSERT(m_parsingMode == SupportsMode); | |
1107 CharacterType* name = tokenStart<CharacterType>(); | |
1108 | |
1109 SWITCH(name, length) { | |
1110 CASE("or") { | |
1111 m_token = SUPPORTS_OR; | |
1112 } | |
1113 CASE("and") { | |
1114 m_token = SUPPORTS_AND; | |
1115 } | |
1116 CASE("not") { | |
1117 m_token = SUPPORTS_NOT; | |
1118 } | |
1119 } | |
1120 } | |
1121 | |
1122 template <typename SrcCharacterType> | |
1123 int CSSTokenizer::realLex(void* yylvalWithoutType) | |
1124 { | |
1125 YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType); | |
1126 // Write pointer for the next character. | |
1127 SrcCharacterType* result; | |
1128 CSSParserString resultString; | |
1129 bool hasEscape; | |
1130 | |
1131 // The input buffer is terminated by a \0 character, so | |
1132 // it is safe to read one character ahead of a known non-null. | |
1133 #if ENABLE(ASSERT) | |
1134 // In debug we check with an ASSERT that the length is > 0 for string types. | |
1135 yylval->string.clear(); | |
1136 #endif | |
1137 | |
1138 restartAfterComment: | |
1139 result = currentCharacter<SrcCharacterType>(); | |
1140 setTokenStart(result); | |
1141 m_tokenStartLineNumber = m_lineNumber; | |
1142 m_token = *currentCharacter<SrcCharacterType>(); | |
1143 ++currentCharacter<SrcCharacterType>(); | |
1144 | |
1145 switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdenti
fierStart) { | |
1146 case CharacterCaselessU: | |
1147 if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) { | |
1148 if (parseUnicodeRange<SrcCharacterType>()) { | |
1149 m_token = UNICODERANGE; | |
1150 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara
cter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
1151 break; | |
1152 } | |
1153 } | |
1154 // Fall through to CharacterIdentifierStart. | |
1155 | |
1156 case CharacterIdentifierStart: | |
1157 --currentCharacter<SrcCharacterType>(); | |
1158 parseIdentifier(result, yylval->string, hasEscape); | |
1159 m_token = IDENT; | |
1160 | |
1161 if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) { | |
1162 if (m_parsingMode == SupportsMode && !hasEscape) { | |
1163 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCha
racterType>()); | |
1164 if (m_token != IDENT) | |
1165 break; | |
1166 } | |
1167 | |
1168 m_token = FUNCTION; | |
1169 if (!hasEscape) | |
1170 detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<Sr
cCharacterType>()); | |
1171 | |
1172 // Skip parenthesis | |
1173 ++currentCharacter<SrcCharacterType>(); | |
1174 ++result; | |
1175 ++yylval->string.m_length; | |
1176 | |
1177 if (m_token == URI) { | |
1178 m_token = FUNCTION; | |
1179 // Check whether it is really an URI. | |
1180 if (yylval->string.is8Bit()) | |
1181 parseURI<LChar>(yylval->string); | |
1182 else | |
1183 parseURI<UChar>(yylval->string); | |
1184 } | |
1185 } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) { | |
1186 if (m_parsingMode == MediaQueryMode) { | |
1187 detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcC
haracterType>()); | |
1188 } else if (m_parsingMode == SupportsMode) { | |
1189 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCha
racterType>()); | |
1190 } else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqua
l(tokenStart<SrcCharacterType>()[0], 'n')) { | |
1191 if (result - tokenStart<SrcCharacterType>() == 1) { | |
1192 // String "n" is IDENT but "n+1" is NTH. | |
1193 if (parseNthChildExtra<SrcCharacterType>()) { | |
1194 m_token = NTH; | |
1195 yylval->string.m_length = currentCharacter<SrcCharacterT
ype>() - tokenStart<SrcCharacterType>(); | |
1196 } | |
1197 } else if (result - tokenStart<SrcCharacterType>() >= 2 && token
Start<SrcCharacterType>()[1] == '-') { | |
1198 // String "n-" is IDENT but "n-1" is NTH. | |
1199 // Set currentCharacter to '-' to continue parsing. | |
1200 SrcCharacterType* nextCharacter = result; | |
1201 currentCharacter<SrcCharacterType>() = tokenStart<SrcCharact
erType>() + 1; | |
1202 if (parseNthChildExtra<SrcCharacterType>()) { | |
1203 m_token = NTH; | |
1204 yylval->string.setLength(currentCharacter<SrcCharacterTy
pe>() - tokenStart<SrcCharacterType>()); | |
1205 } else { | |
1206 // Revert the change to currentCharacter if unsuccessful
. | |
1207 currentCharacter<SrcCharacterType>() = nextCharacter; | |
1208 } | |
1209 } | |
1210 } | |
1211 } | |
1212 break; | |
1213 | |
1214 case CharacterDot: | |
1215 if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) | |
1216 break; | |
1217 // Fall through to CharacterNumber. | |
1218 | |
1219 case CharacterNumber: { | |
1220 bool dotSeen = (m_token == '.'); | |
1221 | |
1222 while (true) { | |
1223 if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) { | |
1224 // Only one dot is allowed for a number, | |
1225 // and it must be followed by a digit. | |
1226 if (currentCharacter<SrcCharacterType>()[0] != '.' || dotSeen ||
!isASCIIDigit(currentCharacter<SrcCharacterType>()[1])) | |
1227 break; | |
1228 dotSeen = true; | |
1229 } | |
1230 ++currentCharacter<SrcCharacterType>(); | |
1231 } | |
1232 | |
1233 if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaC
aselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) { | |
1234 // "[0-9]+n" is always an NthChild. | |
1235 ++currentCharacter<SrcCharacterType>(); | |
1236 parseNthChildExtra<SrcCharacterType>(); | |
1237 m_token = NTH; | |
1238 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter
<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
1239 break; | |
1240 } | |
1241 | |
1242 // Use SVG parser for numbers on SVG presentation attributes. | |
1243 if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) { | |
1244 // We need to take care of units like 'em' or 'ex'. | |
1245 SrcCharacterType* character = currentCharacter<SrcCharacterType>(); | |
1246 if (isASCIIAlphaCaselessEqual(*character, 'e')) { | |
1247 ASSERT(character - tokenStart<SrcCharacterType>() > 0); | |
1248 ++character; | |
1249 if (*character == '-' || *character == '+' || isASCIIDigit(*char
acter)) { | |
1250 ++character; | |
1251 while (isASCIIDigit(*character)) | |
1252 ++character; | |
1253 // Use FLOATTOKEN if the string contains exponents. | |
1254 dotSeen = true; | |
1255 currentCharacter<SrcCharacterType>() = character; | |
1256 } | |
1257 } | |
1258 if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - toke
nStart<SrcCharacterType>(), yylval->number)) | |
1259 break; | |
1260 } else { | |
1261 yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(),
currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
1262 } | |
1263 | |
1264 // Type of the function. | |
1265 if (isIdentifierStart<SrcCharacterType>()) { | |
1266 SrcCharacterType* type = currentCharacter<SrcCharacterType>(); | |
1267 result = currentCharacter<SrcCharacterType>(); | |
1268 | |
1269 parseIdentifier(result, resultString, hasEscape); | |
1270 | |
1271 m_token = DIMEN; | |
1272 if (!hasEscape) | |
1273 detectNumberToken(type, currentCharacter<SrcCharacterType>() - t
ype); | |
1274 | |
1275 if (m_token == DIMEN) { | |
1276 // The decoded number is overwritten, but this is intentional. | |
1277 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara
cter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
1278 } | |
1279 } else if (*currentCharacter<SrcCharacterType>() == '%') { | |
1280 // Although the CSS grammar says {num}% we follow | |
1281 // webkit at the moment which uses {num}%+. | |
1282 do { | |
1283 ++currentCharacter<SrcCharacterType>(); | |
1284 } while (*currentCharacter<SrcCharacterType>() == '%'); | |
1285 m_token = PERCENTAGE; | |
1286 } else { | |
1287 m_token = dotSeen ? FLOATTOKEN : INTEGER; | |
1288 } | |
1289 break; | |
1290 } | |
1291 | |
1292 case CharacterDash: | |
1293 if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) { | |
1294 --currentCharacter<SrcCharacterType>(); | |
1295 parseIdentifier(result, resultString, hasEscape); | |
1296 m_token = IDENT; | |
1297 | |
1298 if (*currentCharacter<SrcCharacterType>() == '(') { | |
1299 m_token = FUNCTION; | |
1300 if (!hasEscape) | |
1301 detectDashToken<SrcCharacterType>(result - tokenStart<SrcCha
racterType>()); | |
1302 ++currentCharacter<SrcCharacterType>(); | |
1303 ++result; | |
1304 } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape &&
isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) { | |
1305 if (result - tokenStart<SrcCharacterType>() == 2) { | |
1306 // String "-n" is IDENT but "-n+1" is NTH. | |
1307 if (parseNthChildExtra<SrcCharacterType>()) { | |
1308 m_token = NTH; | |
1309 result = currentCharacter<SrcCharacterType>(); | |
1310 } | |
1311 } else if (result - tokenStart<SrcCharacterType>() >= 3 && token
Start<SrcCharacterType>()[2] == '-') { | |
1312 // String "-n-" is IDENT but "-n-1" is NTH. | |
1313 // Set currentCharacter to second '-' of '-n-' to continue p
arsing. | |
1314 SrcCharacterType* nextCharacter = result; | |
1315 currentCharacter<SrcCharacterType>() = tokenStart<SrcCharact
erType>() + 2; | |
1316 if (parseNthChildExtra<SrcCharacterType>()) { | |
1317 m_token = NTH; | |
1318 result = currentCharacter<SrcCharacterType>(); | |
1319 } else { | |
1320 // Revert the change to currentCharacter if unsuccessful
. | |
1321 currentCharacter<SrcCharacterType>() = nextCharacter; | |
1322 } | |
1323 } | |
1324 } | |
1325 resultString.setLength(result - tokenStart<SrcCharacterType>()); | |
1326 yylval->string = resultString; | |
1327 } else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentChar
acter<SrcCharacterType>()[1] == '>') { | |
1328 currentCharacter<SrcCharacterType>() += 2; | |
1329 m_token = SGML_CD; | |
1330 } else if (UNLIKELY(m_parsingMode == NthChildMode)) { | |
1331 // "-[0-9]+n" is always an NthChild. | |
1332 if (parseNthChild<SrcCharacterType>()) { | |
1333 parseNthChildExtra<SrcCharacterType>(); | |
1334 m_token = NTH; | |
1335 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara
cter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
1336 } | |
1337 } | |
1338 break; | |
1339 | |
1340 case CharacterOther: | |
1341 // m_token is simply the current character. | |
1342 break; | |
1343 | |
1344 case CharacterNull: | |
1345 // Do not advance pointer at the end of input. | |
1346 --currentCharacter<SrcCharacterType>(); | |
1347 break; | |
1348 | |
1349 case CharacterWhiteSpace: | |
1350 m_token = WHITESPACE; | |
1351 // Might start with a '\n'. | |
1352 --currentCharacter<SrcCharacterType>(); | |
1353 do { | |
1354 if (*currentCharacter<SrcCharacterType>() == '\n') | |
1355 ++m_lineNumber; | |
1356 ++currentCharacter<SrcCharacterType>(); | |
1357 } while (*currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICh
aracters[*currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace)); | |
1358 break; | |
1359 | |
1360 case CharacterEndMediaQueryOrSupports: | |
1361 if (m_parsingMode == MediaQueryMode || m_parsingMode == SupportsMode) | |
1362 m_parsingMode = NormalMode; | |
1363 break; | |
1364 | |
1365 case CharacterEndNthChild: | |
1366 if (m_parsingMode == NthChildMode) | |
1367 m_parsingMode = NormalMode; | |
1368 break; | |
1369 | |
1370 case CharacterQuote: | |
1371 if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token, Ab
ortIfInvalid)) { | |
1372 ++result; | |
1373 parseString<SrcCharacterType>(result, yylval->string, m_token); | |
1374 m_token = STRING; | |
1375 } | |
1376 break; | |
1377 | |
1378 case CharacterExclamationMark: { | |
1379 SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterTy
pe>()); | |
1380 if (isEqualToCSSIdentifier(start, "important")) { | |
1381 m_token = IMPORTANT_SYM; | |
1382 currentCharacter<SrcCharacterType>() = start + 9; | |
1383 } | |
1384 break; | |
1385 } | |
1386 | |
1387 case CharacterHashmark: { | |
1388 SrcCharacterType* start = currentCharacter<SrcCharacterType>(); | |
1389 result = currentCharacter<SrcCharacterType>(); | |
1390 | |
1391 if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) { | |
1392 // This must be a valid hex number token. | |
1393 do { | |
1394 ++currentCharacter<SrcCharacterType>(); | |
1395 } while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>())); | |
1396 m_token = HEX; | |
1397 yylval->string.init(start, currentCharacter<SrcCharacterType>() - st
art); | |
1398 } else if (isIdentifierStart<SrcCharacterType>()) { | |
1399 m_token = IDSEL; | |
1400 parseIdentifier(result, yylval->string, hasEscape); | |
1401 if (!hasEscape) { | |
1402 // Check whether the identifier is also a valid hex number. | |
1403 SrcCharacterType* current = start; | |
1404 m_token = HEX; | |
1405 do { | |
1406 if (!isASCIIHexDigit(*current)) { | |
1407 m_token = IDSEL; | |
1408 break; | |
1409 } | |
1410 ++current; | |
1411 } while (current < result); | |
1412 } | |
1413 } | |
1414 break; | |
1415 } | |
1416 | |
1417 case CharacterSlash: | |
1418 // Ignore comments. They are not even considered as white spaces. | |
1419 if (*currentCharacter<SrcCharacterType>() == '*') { | |
1420 const CSSParserLocation startLocation = currentLocation(); | |
1421 if (m_parser.m_observer) { | |
1422 unsigned startOffset = currentCharacter<SrcCharacterType>() - da
taStart<SrcCharacterType>() - 1; // Start with a slash. | |
1423 m_parser.m_observer->startComment(startOffset - m_parsedTextPref
ixLength); | |
1424 } | |
1425 ++currentCharacter<SrcCharacterType>(); | |
1426 while (currentCharacter<SrcCharacterType>()[0] != '*' || currentChar
acter<SrcCharacterType>()[1] != '/') { | |
1427 if (*currentCharacter<SrcCharacterType>() == '\n') | |
1428 ++m_lineNumber; | |
1429 if (*currentCharacter<SrcCharacterType>() == '\0') { | |
1430 // Unterminated comments are simply ignored. | |
1431 currentCharacter<SrcCharacterType>() -= 2; | |
1432 m_parser.reportError(startLocation, UnterminatedCommentCSSEr
ror); | |
1433 break; | |
1434 } | |
1435 ++currentCharacter<SrcCharacterType>(); | |
1436 } | |
1437 currentCharacter<SrcCharacterType>() += 2; | |
1438 if (m_parser.m_observer) { | |
1439 unsigned endOffset = currentCharacter<SrcCharacterType>() - data
Start<SrcCharacterType>(); | |
1440 unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1
- m_parsedTextSuffixLength); | |
1441 m_parser.m_observer->endComment(std::min(endOffset, userTextEndO
ffset) - m_parsedTextPrefixLength); | |
1442 } | |
1443 goto restartAfterComment; | |
1444 } | |
1445 break; | |
1446 | |
1447 case CharacterDollar: | |
1448 if (*currentCharacter<SrcCharacterType>() == '=') { | |
1449 ++currentCharacter<SrcCharacterType>(); | |
1450 m_token = ENDSWITH; | |
1451 } | |
1452 break; | |
1453 | |
1454 case CharacterAsterisk: | |
1455 if (*currentCharacter<SrcCharacterType>() == '=') { | |
1456 ++currentCharacter<SrcCharacterType>(); | |
1457 m_token = CONTAINS; | |
1458 } | |
1459 break; | |
1460 | |
1461 case CharacterPlus: | |
1462 if (UNLIKELY(m_parsingMode == NthChildMode)) { | |
1463 // Simplest case. "+[0-9]*n" is always NthChild. | |
1464 if (parseNthChild<SrcCharacterType>()) { | |
1465 parseNthChildExtra<SrcCharacterType>(); | |
1466 m_token = NTH; | |
1467 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara
cter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
1468 } | |
1469 } | |
1470 break; | |
1471 | |
1472 case CharacterLess: | |
1473 if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<S
rcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-')
{ | |
1474 currentCharacter<SrcCharacterType>() += 3; | |
1475 m_token = SGML_CD; | |
1476 } | |
1477 break; | |
1478 | |
1479 case CharacterAt: | |
1480 if (isIdentifierStart<SrcCharacterType>()) { | |
1481 m_token = ATKEYWORD; | |
1482 ++result; | |
1483 parseIdentifier(result, resultString, hasEscape); | |
1484 // The standard enables unicode escapes in at-rules. In this case on
ly the resultString will contain the | |
1485 // correct identifier, hence we have to use it to determine its leng
th instead of the usual pointer arithmetic. | |
1486 detectAtToken<SrcCharacterType>(resultString.length() + 1, hasEscape
); | |
1487 } | |
1488 break; | |
1489 | |
1490 case CharacterBackSlash: | |
1491 if (isCSSEscape(*currentCharacter<SrcCharacterType>())) { | |
1492 --currentCharacter<SrcCharacterType>(); | |
1493 parseIdentifier(result, yylval->string, hasEscape); | |
1494 m_token = IDENT; | |
1495 } | |
1496 break; | |
1497 | |
1498 case CharacterXor: | |
1499 if (*currentCharacter<SrcCharacterType>() == '=') { | |
1500 ++currentCharacter<SrcCharacterType>(); | |
1501 m_token = BEGINSWITH; | |
1502 } | |
1503 break; | |
1504 | |
1505 case CharacterVerticalBar: | |
1506 if (*currentCharacter<SrcCharacterType>() == '=') { | |
1507 ++currentCharacter<SrcCharacterType>(); | |
1508 m_token = DASHMATCH; | |
1509 } | |
1510 break; | |
1511 | |
1512 case CharacterTilde: | |
1513 if (*currentCharacter<SrcCharacterType>() == '=') { | |
1514 ++currentCharacter<SrcCharacterType>(); | |
1515 m_token = INCLUDES; | |
1516 } | |
1517 break; | |
1518 | |
1519 default: | |
1520 ASSERT_NOT_REACHED(); | |
1521 break; | |
1522 } | |
1523 | |
1524 return m_token; | |
1525 } | |
1526 | |
1527 template <> | |
1528 inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart) | |
1529 { | |
1530 m_tokenStart.ptr8 = tokenStart; | |
1531 } | |
1532 | |
1533 template <> | |
1534 inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart) | |
1535 { | |
1536 m_tokenStart.ptr16 = tokenStart; | |
1537 } | |
1538 | |
1539 void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, con
st String& string, const char* suffix, unsigned suffixLength) | |
1540 { | |
1541 m_parsedTextPrefixLength = prefixLength; | |
1542 m_parsedTextSuffixLength = suffixLength; | |
1543 unsigned stringLength = string.length(); | |
1544 unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuff
ixLength + 1; | |
1545 m_length = length; | |
1546 | |
1547 if (!stringLength || string.is8Bit()) { | |
1548 m_dataStart8 = adoptArrayPtr(new LChar[length]); | |
1549 for (unsigned i = 0; i < m_parsedTextPrefixLength; i++) | |
1550 m_dataStart8[i] = prefix[i]; | |
1551 | |
1552 if (stringLength) | |
1553 memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.charact
ers8(), stringLength * sizeof(LChar)); | |
1554 | |
1555 unsigned start = m_parsedTextPrefixLength + stringLength; | |
1556 unsigned end = start + suffixLength; | |
1557 for (unsigned i = start; i < end; i++) | |
1558 m_dataStart8[i] = suffix[i - start]; | |
1559 | |
1560 m_dataStart8[length - 1] = 0; | |
1561 | |
1562 m_is8BitSource = true; | |
1563 m_currentCharacter8 = m_dataStart8.get(); | |
1564 m_currentCharacter16 = 0; | |
1565 setTokenStart<LChar>(m_currentCharacter8); | |
1566 m_lexFunc = &CSSTokenizer::realLex<LChar>; | |
1567 return; | |
1568 } | |
1569 | |
1570 m_dataStart16 = adoptArrayPtr(new UChar[length]); | |
1571 for (unsigned i = 0; i < m_parsedTextPrefixLength; i++) | |
1572 m_dataStart16[i] = prefix[i]; | |
1573 | |
1574 ASSERT(stringLength); | |
1575 memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16()
, stringLength * sizeof(UChar)); | |
1576 | |
1577 unsigned start = m_parsedTextPrefixLength + stringLength; | |
1578 unsigned end = start + suffixLength; | |
1579 for (unsigned i = start; i < end; i++) | |
1580 m_dataStart16[i] = suffix[i - start]; | |
1581 | |
1582 m_dataStart16[length - 1] = 0; | |
1583 | |
1584 m_is8BitSource = false; | |
1585 m_currentCharacter8 = 0; | |
1586 m_currentCharacter16 = m_dataStart16.get(); | |
1587 setTokenStart<UChar>(m_currentCharacter16); | |
1588 m_lexFunc = &CSSTokenizer::realLex<UChar>; | |
1589 } | |
1590 | |
1591 } // namespace blink | |
OLD | NEW |