| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org) | |
| 3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com) | |
| 4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc.
All rights reserved. | |
| 5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com> | |
| 6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org> | |
| 7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo
bile.com/) | |
| 8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. | |
| 9 * Copyright (C) 2012 Intel Corporation. All rights reserved. | |
| 10 * | |
| 11 * This library is free software; you can redistribute it and/or | |
| 12 * modify it under the terms of the GNU Library General Public | |
| 13 * License as published by the Free Software Foundation; either | |
| 14 * version 2 of the License, or (at your option) any later version. | |
| 15 * | |
| 16 * This library is distributed in the hope that it will be useful, | |
| 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 19 * Library General Public License for more details. | |
| 20 * | |
| 21 * You should have received a copy of the GNU Library General Public License | |
| 22 * along with this library; see the file COPYING.LIB. If not, write to | |
| 23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
| 24 * Boston, MA 02110-1301, USA. | |
| 25 */ | |
| 26 | |
| 27 #include "config.h" | |
| 28 #include "core/css/CSSTokenizer.h" | |
| 29 | |
| 30 #include "core/css/CSSKeyframeRule.h" | |
| 31 #include "core/css/parser/BisonCSSParser.h" | |
| 32 #include "core/css/CSSParserValues.h" | |
| 33 #include "core/css/MediaQuery.h" | |
| 34 #include "core/css/StyleRule.h" | |
| 35 #include "core/html/parser/HTMLParserIdioms.h" | |
| 36 #include "core/svg/SVGParserUtilities.h" | |
| 37 | |
| 38 namespace blink { | |
| 39 | |
| 40 #include "core/CSSGrammar.h" | |
| 41 | |
| 42 enum CharacterType { | |
| 43 // Types for the main switch. | |
| 44 | |
| 45 // The first 4 types must be grouped together, as they | |
| 46 // represent the allowed chars in an identifier. | |
| 47 CharacterCaselessU, | |
| 48 CharacterIdentifierStart, | |
| 49 CharacterNumber, | |
| 50 CharacterDash, | |
| 51 | |
| 52 CharacterOther, | |
| 53 CharacterNull, | |
| 54 CharacterWhiteSpace, | |
| 55 CharacterEndMediaQueryOrSupports, | |
| 56 CharacterEndNthChild, | |
| 57 CharacterQuote, | |
| 58 CharacterExclamationMark, | |
| 59 CharacterHashmark, | |
| 60 CharacterDollar, | |
| 61 CharacterAsterisk, | |
| 62 CharacterPlus, | |
| 63 CharacterDot, | |
| 64 CharacterSlash, | |
| 65 CharacterLess, | |
| 66 CharacterAt, | |
| 67 CharacterBackSlash, | |
| 68 CharacterXor, | |
| 69 CharacterVerticalBar, | |
| 70 CharacterTilde, | |
| 71 }; | |
| 72 | |
| 73 // 128 ASCII codes | |
| 74 static const CharacterType typesOfASCIICharacters[128] = { | |
| 75 /* 0 - Null */ CharacterNull, | |
| 76 /* 1 - Start of Heading */ CharacterOther, | |
| 77 /* 2 - Start of Text */ CharacterOther, | |
| 78 /* 3 - End of Text */ CharacterOther, | |
| 79 /* 4 - End of Transm. */ CharacterOther, | |
| 80 /* 5 - Enquiry */ CharacterOther, | |
| 81 /* 6 - Acknowledgment */ CharacterOther, | |
| 82 /* 7 - Bell */ CharacterOther, | |
| 83 /* 8 - Back Space */ CharacterOther, | |
| 84 /* 9 - Horizontal Tab */ CharacterWhiteSpace, | |
| 85 /* 10 - Line Feed */ CharacterWhiteSpace, | |
| 86 /* 11 - Vertical Tab */ CharacterOther, | |
| 87 /* 12 - Form Feed */ CharacterWhiteSpace, | |
| 88 /* 13 - Carriage Return */ CharacterWhiteSpace, | |
| 89 /* 14 - Shift Out */ CharacterOther, | |
| 90 /* 15 - Shift In */ CharacterOther, | |
| 91 /* 16 - Data Line Escape */ CharacterOther, | |
| 92 /* 17 - Device Control 1 */ CharacterOther, | |
| 93 /* 18 - Device Control 2 */ CharacterOther, | |
| 94 /* 19 - Device Control 3 */ CharacterOther, | |
| 95 /* 20 - Device Control 4 */ CharacterOther, | |
| 96 /* 21 - Negative Ack. */ CharacterOther, | |
| 97 /* 22 - Synchronous Idle */ CharacterOther, | |
| 98 /* 23 - End of Transmit */ CharacterOther, | |
| 99 /* 24 - Cancel */ CharacterOther, | |
| 100 /* 25 - End of Medium */ CharacterOther, | |
| 101 /* 26 - Substitute */ CharacterOther, | |
| 102 /* 27 - Escape */ CharacterOther, | |
| 103 /* 28 - File Separator */ CharacterOther, | |
| 104 /* 29 - Group Separator */ CharacterOther, | |
| 105 /* 30 - Record Separator */ CharacterOther, | |
| 106 /* 31 - Unit Separator */ CharacterOther, | |
| 107 /* 32 - Space */ CharacterWhiteSpace, | |
| 108 /* 33 - ! */ CharacterExclamationMark, | |
| 109 /* 34 - " */ CharacterQuote, | |
| 110 /* 35 - # */ CharacterHashmark, | |
| 111 /* 36 - $ */ CharacterDollar, | |
| 112 /* 37 - % */ CharacterOther, | |
| 113 /* 38 - & */ CharacterOther, | |
| 114 /* 39 - ' */ CharacterQuote, | |
| 115 /* 40 - ( */ CharacterOther, | |
| 116 /* 41 - ) */ CharacterEndNthChild, | |
| 117 /* 42 - * */ CharacterAsterisk, | |
| 118 /* 43 - + */ CharacterPlus, | |
| 119 /* 44 - , */ CharacterOther, | |
| 120 /* 45 - - */ CharacterDash, | |
| 121 /* 46 - . */ CharacterDot, | |
| 122 /* 47 - / */ CharacterSlash, | |
| 123 /* 48 - 0 */ CharacterNumber, | |
| 124 /* 49 - 1 */ CharacterNumber, | |
| 125 /* 50 - 2 */ CharacterNumber, | |
| 126 /* 51 - 3 */ CharacterNumber, | |
| 127 /* 52 - 4 */ CharacterNumber, | |
| 128 /* 53 - 5 */ CharacterNumber, | |
| 129 /* 54 - 6 */ CharacterNumber, | |
| 130 /* 55 - 7 */ CharacterNumber, | |
| 131 /* 56 - 8 */ CharacterNumber, | |
| 132 /* 57 - 9 */ CharacterNumber, | |
| 133 /* 58 - : */ CharacterOther, | |
| 134 /* 59 - ; */ CharacterEndMediaQueryOrSupports, | |
| 135 /* 60 - < */ CharacterLess, | |
| 136 /* 61 - = */ CharacterOther, | |
| 137 /* 62 - > */ CharacterOther, | |
| 138 /* 63 - ? */ CharacterOther, | |
| 139 /* 64 - @ */ CharacterAt, | |
| 140 /* 65 - A */ CharacterIdentifierStart, | |
| 141 /* 66 - B */ CharacterIdentifierStart, | |
| 142 /* 67 - C */ CharacterIdentifierStart, | |
| 143 /* 68 - D */ CharacterIdentifierStart, | |
| 144 /* 69 - E */ CharacterIdentifierStart, | |
| 145 /* 70 - F */ CharacterIdentifierStart, | |
| 146 /* 71 - G */ CharacterIdentifierStart, | |
| 147 /* 72 - H */ CharacterIdentifierStart, | |
| 148 /* 73 - I */ CharacterIdentifierStart, | |
| 149 /* 74 - J */ CharacterIdentifierStart, | |
| 150 /* 75 - K */ CharacterIdentifierStart, | |
| 151 /* 76 - L */ CharacterIdentifierStart, | |
| 152 /* 77 - M */ CharacterIdentifierStart, | |
| 153 /* 78 - N */ CharacterIdentifierStart, | |
| 154 /* 79 - O */ CharacterIdentifierStart, | |
| 155 /* 80 - P */ CharacterIdentifierStart, | |
| 156 /* 81 - Q */ CharacterIdentifierStart, | |
| 157 /* 82 - R */ CharacterIdentifierStart, | |
| 158 /* 83 - S */ CharacterIdentifierStart, | |
| 159 /* 84 - T */ CharacterIdentifierStart, | |
| 160 /* 85 - U */ CharacterCaselessU, | |
| 161 /* 86 - V */ CharacterIdentifierStart, | |
| 162 /* 87 - W */ CharacterIdentifierStart, | |
| 163 /* 88 - X */ CharacterIdentifierStart, | |
| 164 /* 89 - Y */ CharacterIdentifierStart, | |
| 165 /* 90 - Z */ CharacterIdentifierStart, | |
| 166 /* 91 - [ */ CharacterOther, | |
| 167 /* 92 - \ */ CharacterBackSlash, | |
| 168 /* 93 - ] */ CharacterOther, | |
| 169 /* 94 - ^ */ CharacterXor, | |
| 170 /* 95 - _ */ CharacterIdentifierStart, | |
| 171 /* 96 - ` */ CharacterOther, | |
| 172 /* 97 - a */ CharacterIdentifierStart, | |
| 173 /* 98 - b */ CharacterIdentifierStart, | |
| 174 /* 99 - c */ CharacterIdentifierStart, | |
| 175 /* 100 - d */ CharacterIdentifierStart, | |
| 176 /* 101 - e */ CharacterIdentifierStart, | |
| 177 /* 102 - f */ CharacterIdentifierStart, | |
| 178 /* 103 - g */ CharacterIdentifierStart, | |
| 179 /* 104 - h */ CharacterIdentifierStart, | |
| 180 /* 105 - i */ CharacterIdentifierStart, | |
| 181 /* 106 - j */ CharacterIdentifierStart, | |
| 182 /* 107 - k */ CharacterIdentifierStart, | |
| 183 /* 108 - l */ CharacterIdentifierStart, | |
| 184 /* 109 - m */ CharacterIdentifierStart, | |
| 185 /* 110 - n */ CharacterIdentifierStart, | |
| 186 /* 111 - o */ CharacterIdentifierStart, | |
| 187 /* 112 - p */ CharacterIdentifierStart, | |
| 188 /* 113 - q */ CharacterIdentifierStart, | |
| 189 /* 114 - r */ CharacterIdentifierStart, | |
| 190 /* 115 - s */ CharacterIdentifierStart, | |
| 191 /* 116 - t */ CharacterIdentifierStart, | |
| 192 /* 117 - u */ CharacterCaselessU, | |
| 193 /* 118 - v */ CharacterIdentifierStart, | |
| 194 /* 119 - w */ CharacterIdentifierStart, | |
| 195 /* 120 - x */ CharacterIdentifierStart, | |
| 196 /* 121 - y */ CharacterIdentifierStart, | |
| 197 /* 122 - z */ CharacterIdentifierStart, | |
| 198 /* 123 - { */ CharacterEndMediaQueryOrSupports, | |
| 199 /* 124 - | */ CharacterVerticalBar, | |
| 200 /* 125 - } */ CharacterOther, | |
| 201 /* 126 - ~ */ CharacterTilde, | |
| 202 /* 127 - Delete */ CharacterOther, | |
| 203 }; | |
| 204 | |
| 205 // Utility functions for the CSS tokenizer. | |
| 206 | |
| 207 template <typename CharacterType> | |
| 208 static inline bool isCSSLetter(CharacterType character) | |
| 209 { | |
| 210 return character >= 128 || typesOfASCIICharacters[character] <= CharacterDas
h; | |
| 211 } | |
| 212 | |
| 213 template <typename CharacterType> | |
| 214 static inline bool isCSSEscape(CharacterType character) | |
| 215 { | |
| 216 return character >= ' ' && character != 127; | |
| 217 } | |
| 218 | |
| 219 template <typename CharacterType> | |
| 220 static inline bool isURILetter(CharacterType character) | |
| 221 { | |
| 222 return (character >= '*' && character != 127) || (character >= '#' && charac
ter <= '&') || character == '!'; | |
| 223 } | |
| 224 | |
| 225 template <typename CharacterType> | |
| 226 static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter) | |
| 227 { | |
| 228 return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || cu
rrentCharacter[0] >= 128 | |
| 229 || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1])); | |
| 230 } | |
| 231 | |
| 232 template <typename CharacterType> | |
| 233 static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char*
constantString) | |
| 234 { | |
| 235 // Compare an character memory data with a zero terminated string. | |
| 236 do { | |
| 237 // The input must be part of an identifier if constantChar or constStrin
g | |
| 238 // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to
'-'. | |
| 239 ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantSt
ring == '-'); | |
| 240 ASSERT(*constantString != '-' || isCSSLetter(*cssString)); | |
| 241 if (toASCIILowerUnchecked(*cssString++) != (*constantString++)) | |
| 242 return false; | |
| 243 } while (*constantString); | |
| 244 return true; | |
| 245 } | |
| 246 | |
| 247 template <typename CharacterType> | |
| 248 static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, co
nst char* constantString) | |
| 249 { | |
| 250 ASSERT(*constantString); | |
| 251 | |
| 252 do { | |
| 253 if (*string++ != *constantString++) | |
| 254 return false; | |
| 255 } while (*constantString); | |
| 256 return true; | |
| 257 } | |
| 258 | |
| 259 template <typename CharacterType> | |
| 260 static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter) | |
| 261 { | |
| 262 // Returns with 0, if escape check is failed. Otherwise | |
| 263 // it returns with the following character. | |
| 264 ASSERT(*currentCharacter == '\\'); | |
| 265 | |
| 266 ++currentCharacter; | |
| 267 if (!isCSSEscape(*currentCharacter)) | |
| 268 return 0; | |
| 269 | |
| 270 if (isASCIIHexDigit(*currentCharacter)) { | |
| 271 int length = 6; | |
| 272 | |
| 273 do { | |
| 274 ++currentCharacter; | |
| 275 } while (isASCIIHexDigit(*currentCharacter) && --length); | |
| 276 | |
| 277 // Optional space after the escape sequence. | |
| 278 if (isHTMLSpace<CharacterType>(*currentCharacter)) | |
| 279 ++currentCharacter; | |
| 280 return currentCharacter; | |
| 281 } | |
| 282 return currentCharacter + 1; | |
| 283 } | |
| 284 | |
| 285 template <typename CharacterType> | |
| 286 static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter) | |
| 287 { | |
| 288 while (isHTMLSpace<CharacterType>(*currentCharacter)) | |
| 289 ++currentCharacter; | |
| 290 return currentCharacter; | |
| 291 } | |
| 292 | |
| 293 // Main CSS tokenizer functions. | |
| 294 | |
| 295 template <> | |
| 296 inline LChar*& CSSTokenizer::currentCharacter<LChar>() | |
| 297 { | |
| 298 return m_currentCharacter8; | |
| 299 } | |
| 300 | |
| 301 template <> | |
| 302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() | |
| 303 { | |
| 304 return m_currentCharacter16; | |
| 305 } | |
| 306 | |
| 307 UChar* CSSTokenizer::allocateStringBuffer16(size_t len) | |
| 308 { | |
| 309 // Allocates and returns a CSSTokenizer owned buffer for storing | |
| 310 // UTF-16 data. Used to get a suitable life span for UTF-16 | |
| 311 // strings, identifiers and URIs created by the tokenizer. | |
| 312 OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]); | |
| 313 | |
| 314 UChar* bufferPtr = buffer.get(); | |
| 315 | |
| 316 m_cssStrings16.append(buffer.release()); | |
| 317 return bufferPtr; | |
| 318 } | |
| 319 | |
| 320 template <> | |
| 321 inline LChar* CSSTokenizer::dataStart<LChar>() | |
| 322 { | |
| 323 return m_dataStart8.get(); | |
| 324 } | |
| 325 | |
| 326 template <> | |
| 327 inline UChar* CSSTokenizer::dataStart<UChar>() | |
| 328 { | |
| 329 return m_dataStart16.get(); | |
| 330 } | |
| 331 | |
| 332 template <typename CharacterType> | |
| 333 inline CSSParserLocation CSSTokenizer::tokenLocation() | |
| 334 { | |
| 335 CSSParserLocation location; | |
| 336 location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterT
ype>() - tokenStart<CharacterType>()); | |
| 337 location.lineNumber = m_tokenStartLineNumber; | |
| 338 location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>(); | |
| 339 return location; | |
| 340 } | |
| 341 | |
| 342 CSSParserLocation CSSTokenizer::currentLocation() | |
| 343 { | |
| 344 if (is8BitSource()) | |
| 345 return tokenLocation<LChar>(); | |
| 346 return tokenLocation<UChar>(); | |
| 347 } | |
| 348 | |
| 349 template <typename CharacterType> | |
| 350 inline bool CSSTokenizer::isIdentifierStart() | |
| 351 { | |
| 352 // Check whether an identifier is started. | |
| 353 return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-'
) ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1); | |
| 354 } | |
| 355 | |
| 356 enum CheckStringValidationMode { | |
| 357 AbortIfInvalid, | |
| 358 SkipInvalid | |
| 359 }; | |
| 360 | |
| 361 template <typename CharacterType> | |
| 362 static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter,
int quote, CheckStringValidationMode mode) | |
| 363 { | |
| 364 // If mode is AbortIfInvalid and the string check fails it returns | |
| 365 // with 0. Otherwise it returns with a pointer to the first | |
| 366 // character after the string. | |
| 367 while (true) { | |
| 368 if (UNLIKELY(*currentCharacter == quote)) { | |
| 369 // String parsing is successful. | |
| 370 return currentCharacter + 1; | |
| 371 } | |
| 372 if (UNLIKELY(!*currentCharacter)) { | |
| 373 // String parsing is successful up to end of input. | |
| 374 return currentCharacter; | |
| 375 } | |
| 376 if (mode == AbortIfInvalid && UNLIKELY(*currentCharacter <= '\r' && (*cu
rrentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) { | |
| 377 // String parsing is failed for character '\n', '\f' or '\r'. | |
| 378 return 0; | |
| 379 } | |
| 380 | |
| 381 if (LIKELY(currentCharacter[0] != '\\')) { | |
| 382 ++currentCharacter; | |
| 383 } else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') { | |
| 384 currentCharacter += 2; | |
| 385 } else if (currentCharacter[1] == '\r') { | |
| 386 currentCharacter += currentCharacter[2] == '\n' ? 3 : 2; | |
| 387 } else { | |
| 388 CharacterType* next = checkAndSkipEscape(currentCharacter); | |
| 389 if (!next) { | |
| 390 if (mode == AbortIfInvalid) | |
| 391 return 0; | |
| 392 next = currentCharacter + 1; | |
| 393 } | |
| 394 currentCharacter = next; | |
| 395 } | |
| 396 } | |
| 397 } | |
| 398 | |
| 399 template <typename CharacterType> | |
| 400 unsigned CSSTokenizer::parseEscape(CharacterType*& src) | |
| 401 { | |
| 402 ASSERT(*src == '\\' && isCSSEscape(src[1])); | |
| 403 | |
| 404 unsigned unicode = 0; | |
| 405 | |
| 406 ++src; | |
| 407 if (isASCIIHexDigit(*src)) { | |
| 408 | |
| 409 int length = 6; | |
| 410 | |
| 411 do { | |
| 412 unicode = (unicode << 4) + toASCIIHexValue(*src++); | |
| 413 } while (--length && isASCIIHexDigit(*src)); | |
| 414 | |
| 415 // Characters above 0x10ffff are not handled. | |
| 416 if (unicode > 0x10ffff) | |
| 417 unicode = 0xfffd; | |
| 418 | |
| 419 // Optional space after the escape sequence. | |
| 420 if (isHTMLSpace<CharacterType>(*src)) | |
| 421 ++src; | |
| 422 | |
| 423 return unicode; | |
| 424 } | |
| 425 | |
| 426 return *src++; | |
| 427 } | |
| 428 | |
| 429 template <> | |
| 430 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode
) | |
| 431 { | |
| 432 ASSERT(unicode <= 0xff); | |
| 433 *result = unicode; | |
| 434 | |
| 435 ++result; | |
| 436 } | |
| 437 | |
| 438 template <> | |
| 439 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode
) | |
| 440 { | |
| 441 // Replace unicode with a surrogate pairs when it is bigger than 0xffff | |
| 442 if (U16_LENGTH(unicode) == 2) { | |
| 443 *result++ = U16_LEAD(unicode); | |
| 444 *result = U16_TRAIL(unicode); | |
| 445 } else { | |
| 446 *result = unicode; | |
| 447 } | |
| 448 | |
| 449 ++result; | |
| 450 } | |
| 451 | |
| 452 template <typename SrcCharacterType> | |
| 453 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src) | |
| 454 { | |
| 455 // The decoded form of an identifier (after resolving escape | |
| 456 // sequences) will not contain more characters (ASCII or UTF-16 | |
| 457 // codepoints) than the input. This code can therefore ignore | |
| 458 // escape sequences completely. | |
| 459 SrcCharacterType* start = src; | |
| 460 do { | |
| 461 if (LIKELY(*src != '\\')) | |
| 462 src++; | |
| 463 else | |
| 464 parseEscape<SrcCharacterType>(src); | |
| 465 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1]))); | |
| 466 | |
| 467 return src - start; | |
| 468 } | |
| 469 | |
| 470 template <typename SrcCharacterType, typename DestCharacterType> | |
| 471 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh
aracterType*& result, bool& hasEscape) | |
| 472 { | |
| 473 hasEscape = false; | |
| 474 do { | |
| 475 if (LIKELY(*src != '\\')) { | |
| 476 *result++ = *src++; | |
| 477 } else { | |
| 478 hasEscape = true; | |
| 479 SrcCharacterType* savedEscapeStart = src; | |
| 480 unsigned unicode = parseEscape<SrcCharacterType>(src); | |
| 481 if (unicode > 0xff && sizeof(DestCharacterType) == 1) { | |
| 482 src = savedEscapeStart; | |
| 483 return false; | |
| 484 } | |
| 485 UnicodeToChars(result, unicode); | |
| 486 } | |
| 487 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1]))); | |
| 488 | |
| 489 return true; | |
| 490 } | |
| 491 | |
| 492 template <typename CharacterType> | |
| 493 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin
g& resultString, bool& hasEscape) | |
| 494 { | |
| 495 // If a valid identifier start is found, we can safely | |
| 496 // parse the identifier until the next invalid character. | |
| 497 ASSERT(isIdentifierStart<CharacterType>()); | |
| 498 | |
| 499 CharacterType* start = currentCharacter<CharacterType>(); | |
| 500 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res
ult, hasEscape))) { | |
| 501 // Found an escape we couldn't handle with 8 bits, copy what has been re
cognized and continue | |
| 502 ASSERT(is8BitSource()); | |
| 503 UChar* result16 = allocateStringBuffer16((result - start) + peekMaxIdent
ifierLen(currentCharacter<CharacterType>())); | |
| 504 UChar* start16 = result16; | |
| 505 int i = 0; | |
| 506 for (; i < result - start; i++) | |
| 507 result16[i] = start[i]; | |
| 508 | |
| 509 result16 += i; | |
| 510 | |
| 511 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has
Escape); | |
| 512 | |
| 513 resultString.init(start16, result16 - start16); | |
| 514 | |
| 515 return; | |
| 516 } | |
| 517 | |
| 518 resultString.init(start, result - start); | |
| 519 } | |
| 520 | |
| 521 template <typename SrcCharacterType> | |
| 522 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote) | |
| 523 { | |
| 524 // The decoded form of a CSS string (after resolving escape | |
| 525 // sequences) will not contain more characters (ASCII or UTF-16 | |
| 526 // codepoints) than the input. This code can therefore ignore | |
| 527 // escape sequences completely and just return the length of the | |
| 528 // input string (possibly including terminating quote if any). | |
| 529 SrcCharacterType* end = checkAndSkipString(src, quote, SkipInvalid); | |
| 530 return end ? end - src : 0; | |
| 531 } | |
| 532 | |
| 533 template <typename SrcCharacterType, typename DestCharacterType> | |
| 534 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac
terType*& result, UChar quote) | |
| 535 { | |
| 536 while (true) { | |
| 537 if (UNLIKELY(*src == quote)) { | |
| 538 // String parsing is done. | |
| 539 ++src; | |
| 540 return true; | |
| 541 } | |
| 542 if (UNLIKELY(!*src)) { | |
| 543 // String parsing is done, but don't advance pointer if at the end o
f input. | |
| 544 return true; | |
| 545 } | |
| 546 if (LIKELY(src[0] != '\\')) { | |
| 547 *result++ = *src++; | |
| 548 } else if (src[1] == '\n' || src[1] == '\f') { | |
| 549 src += 2; | |
| 550 } else if (src[1] == '\r') { | |
| 551 src += src[2] == '\n' ? 3 : 2; | |
| 552 } else { | |
| 553 SrcCharacterType* savedEscapeStart = src; | |
| 554 unsigned unicode = parseEscape<SrcCharacterType>(src); | |
| 555 if (unicode > 0xff && sizeof(DestCharacterType) == 1) { | |
| 556 src = savedEscapeStart; | |
| 557 return false; | |
| 558 } | |
| 559 UnicodeToChars(result, unicode); | |
| 560 } | |
| 561 } | |
| 562 | |
| 563 return true; | |
| 564 } | |
| 565 | |
| 566 template <typename CharacterType> | |
| 567 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r
esultString, UChar quote) | |
| 568 { | |
| 569 CharacterType* start = currentCharacter<CharacterType>(); | |
| 570 | |
| 571 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result,
quote))) { | |
| 572 // Found an escape we couldn't handle with 8 bits, copy what has been re
cognized and continue | |
| 573 ASSERT(is8BitSource()); | |
| 574 UChar* result16 = allocateStringBuffer16((result - start) + peekMaxStrin
gLen(currentCharacter<CharacterType>(), quote)); | |
| 575 UChar* start16 = result16; | |
| 576 int i = 0; | |
| 577 for (; i < result - start; i++) | |
| 578 result16[i] = start[i]; | |
| 579 | |
| 580 result16 += i; | |
| 581 | |
| 582 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); | |
| 583 | |
| 584 resultString.init(start16, result16 - start16); | |
| 585 return; | |
| 586 } | |
| 587 | |
| 588 resultString.init(start, result - start); | |
| 589 } | |
| 590 | |
| 591 template <typename CharacterType> | |
| 592 inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UC
har& quote) | |
| 593 { | |
| 594 start = skipWhiteSpace(currentCharacter<CharacterType>()); | |
| 595 | |
| 596 if (*start == '"' || *start == '\'') { | |
| 597 quote = *start++; | |
| 598 end = checkAndSkipString(start, quote, AbortIfInvalid); | |
| 599 if (!end) | |
| 600 return false; | |
| 601 } else { | |
| 602 quote = 0; | |
| 603 end = start; | |
| 604 while (isURILetter(*end)) { | |
| 605 if (LIKELY(*end != '\\')) { | |
| 606 ++end; | |
| 607 } else { | |
| 608 end = checkAndSkipEscape(end); | |
| 609 if (!end) | |
| 610 return false; | |
| 611 } | |
| 612 } | |
| 613 } | |
| 614 | |
| 615 end = skipWhiteSpace(end); | |
| 616 if (*end != ')') | |
| 617 return false; | |
| 618 | |
| 619 return true; | |
| 620 } | |
| 621 | |
| 622 template <typename SrcCharacterType> | |
| 623 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote) | |
| 624 { | |
| 625 // The decoded form of a URI (after resolving escape sequences) | |
| 626 // will not contain more characters (ASCII or UTF-16 codepoints) | |
| 627 // than the input. This code can therefore ignore escape sequences | |
| 628 // completely. | |
| 629 SrcCharacterType* start = src; | |
| 630 if (quote) { | |
| 631 ASSERT(quote == '"' || quote == '\''); | |
| 632 return peekMaxStringLen(src, quote); | |
| 633 } | |
| 634 | |
| 635 while (isURILetter(*src)) { | |
| 636 if (LIKELY(*src != '\\')) | |
| 637 src++; | |
| 638 else | |
| 639 parseEscape<SrcCharacterType>(src); | |
| 640 } | |
| 641 | |
| 642 return src - start; | |
| 643 } | |
| 644 | |
| 645 template <typename SrcCharacterType, typename DestCharacterType> | |
| 646 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter
Type*& dest, UChar quote) | |
| 647 { | |
| 648 if (quote) { | |
| 649 ASSERT(quote == '"' || quote == '\''); | |
| 650 return parseStringInternal(src, dest, quote); | |
| 651 } | |
| 652 | |
| 653 while (isURILetter(*src)) { | |
| 654 if (LIKELY(*src != '\\')) { | |
| 655 *dest++ = *src++; | |
| 656 } else { | |
| 657 unsigned unicode = parseEscape<SrcCharacterType>(src); | |
| 658 if (unicode > 0xff && sizeof(DestCharacterType) == 1) | |
| 659 return false; | |
| 660 UnicodeToChars(dest, unicode); | |
| 661 } | |
| 662 } | |
| 663 | |
| 664 return true; | |
| 665 } | |
| 666 | |
| 667 template <typename CharacterType> | |
| 668 inline void CSSTokenizer::parseURI(CSSParserString& string) | |
| 669 { | |
| 670 CharacterType* uriStart; | |
| 671 CharacterType* uriEnd; | |
| 672 UChar quote; | |
| 673 if (!findURI(uriStart, uriEnd, quote)) | |
| 674 return; | |
| 675 | |
| 676 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; | |
| 677 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))
) { | |
| 678 string.init(uriStart, dest - uriStart); | |
| 679 } else { | |
| 680 // An escape sequence was encountered that can't be stored in 8 bits. | |
| 681 // Reset the current character to the start of the URI and re-parse with | |
| 682 // a 16-bit destination. | |
| 683 ASSERT(is8BitSource()); | |
| 684 currentCharacter<CharacterType>() = uriStart; | |
| 685 UChar* result16 = allocateStringBuffer16(peekMaxURILen(currentCharacter<
CharacterType>(), quote)); | |
| 686 UChar* uriStart16 = result16; | |
| 687 bool result = parseURIInternal(currentCharacter<CharacterType>(), result
16, quote); | |
| 688 ASSERT_UNUSED(result, result); | |
| 689 string.init(uriStart16, result16 - uriStart16); | |
| 690 } | |
| 691 | |
| 692 currentCharacter<CharacterType>() = uriEnd + 1; | |
| 693 m_token = URI; | |
| 694 } | |
| 695 | |
| 696 template <typename CharacterType> | |
| 697 inline bool CSSTokenizer::parseUnicodeRange() | |
| 698 { | |
| 699 CharacterType* character = currentCharacter<CharacterType>() + 1; | |
| 700 int length = 6; | |
| 701 ASSERT(*currentCharacter<CharacterType>() == '+'); | |
| 702 | |
| 703 while (isASCIIHexDigit(*character) && length) { | |
| 704 ++character; | |
| 705 --length; | |
| 706 } | |
| 707 | |
| 708 if (length && *character == '?') { | |
| 709 // At most 5 hex digit followed by a question mark. | |
| 710 do { | |
| 711 ++character; | |
| 712 --length; | |
| 713 } while (*character == '?' && length); | |
| 714 currentCharacter<CharacterType>() = character; | |
| 715 return true; | |
| 716 } | |
| 717 | |
| 718 if (length < 6) { | |
| 719 // At least one hex digit. | |
| 720 if (character[0] == '-' && isASCIIHexDigit(character[1])) { | |
| 721 // Followed by a dash and a hex digit. | |
| 722 ++character; | |
| 723 length = 6; | |
| 724 do { | |
| 725 ++character; | |
| 726 } while (--length && isASCIIHexDigit(*character)); | |
| 727 } | |
| 728 currentCharacter<CharacterType>() = character; | |
| 729 return true; | |
| 730 } | |
| 731 return false; | |
| 732 } | |
| 733 | |
| 734 template <typename CharacterType> | |
| 735 bool CSSTokenizer::parseNthChild() | |
| 736 { | |
| 737 CharacterType* character = currentCharacter<CharacterType>(); | |
| 738 | |
| 739 while (isASCIIDigit(*character)) | |
| 740 ++character; | |
| 741 if (isASCIIAlphaCaselessEqual(*character, 'n')) { | |
| 742 currentCharacter<CharacterType>() = character + 1; | |
| 743 return true; | |
| 744 } | |
| 745 return false; | |
| 746 } | |
| 747 | |
| 748 template <typename CharacterType> | |
| 749 bool CSSTokenizer::parseNthChildExtra() | |
| 750 { | |
| 751 CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>())
; | |
| 752 if (*character != '+' && *character != '-') | |
| 753 return false; | |
| 754 | |
| 755 character = skipWhiteSpace(character + 1); | |
| 756 if (!isASCIIDigit(*character)) | |
| 757 return false; | |
| 758 | |
| 759 do { | |
| 760 ++character; | |
| 761 } while (isASCIIDigit(*character)); | |
| 762 | |
| 763 currentCharacter<CharacterType>() = character; | |
| 764 return true; | |
| 765 } | |
| 766 | |
| 767 template <typename CharacterType> | |
| 768 inline bool CSSTokenizer::detectFunctionTypeToken(int length) | |
| 769 { | |
| 770 ASSERT(length > 0); | |
| 771 CharacterType* name = tokenStart<CharacterType>(); | |
| 772 SWITCH(name, length) { | |
| 773 CASE("not") { | |
| 774 m_token = NOTFUNCTION; | |
| 775 return true; | |
| 776 } | |
| 777 CASE("url") { | |
| 778 m_token = URI; | |
| 779 return true; | |
| 780 } | |
| 781 CASE("cue") { | |
| 782 m_token = CUEFUNCTION; | |
| 783 return true; | |
| 784 } | |
| 785 CASE("calc") { | |
| 786 m_token = CALCFUNCTION; | |
| 787 return true; | |
| 788 } | |
| 789 CASE("host") { | |
| 790 m_token = HOSTFUNCTION; | |
| 791 return true; | |
| 792 } | |
| 793 CASE("host-context") { | |
| 794 m_token = HOSTCONTEXTFUNCTION; | |
| 795 return true; | |
| 796 } | |
| 797 CASE("nth-child") { | |
| 798 m_parsingMode = NthChildMode; | |
| 799 return true; | |
| 800 } | |
| 801 CASE("nth-of-type") { | |
| 802 m_parsingMode = NthChildMode; | |
| 803 return true; | |
| 804 } | |
| 805 CASE("nth-last-child") { | |
| 806 m_parsingMode = NthChildMode; | |
| 807 return true; | |
| 808 } | |
| 809 CASE("nth-last-of-type") { | |
| 810 m_parsingMode = NthChildMode; | |
| 811 return true; | |
| 812 } | |
| 813 } | |
| 814 return false; | |
| 815 } | |
| 816 | |
| 817 template <typename CharacterType> | |
| 818 inline void CSSTokenizer::detectMediaQueryToken(int length) | |
| 819 { | |
| 820 ASSERT(m_parsingMode == MediaQueryMode); | |
| 821 CharacterType* name = tokenStart<CharacterType>(); | |
| 822 | |
| 823 SWITCH(name, length) { | |
| 824 CASE("and") { | |
| 825 m_token = MEDIA_AND; | |
| 826 } | |
| 827 CASE("not") { | |
| 828 m_token = MEDIA_NOT; | |
| 829 } | |
| 830 CASE("only") { | |
| 831 m_token = MEDIA_ONLY; | |
| 832 } | |
| 833 CASE("or") { | |
| 834 m_token = MEDIA_OR; | |
| 835 } | |
| 836 } | |
| 837 } | |
| 838 | |
| 839 template <typename CharacterType> | |
| 840 inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length) | |
| 841 { | |
| 842 ASSERT(length > 0); | |
| 843 | |
| 844 SWITCH(type, length) { | |
| 845 CASE("cm") { | |
| 846 m_token = CMS; | |
| 847 } | |
| 848 CASE("ch") { | |
| 849 m_token = CHS; | |
| 850 } | |
| 851 CASE("deg") { | |
| 852 m_token = DEGS; | |
| 853 } | |
| 854 CASE("dppx") { | |
| 855 // There is a discussion about the name of this unit on www-style. | |
| 856 // Keep this compile time guard in place until that is resolved. | |
| 857 // http://lists.w3.org/Archives/Public/www-style/2012May/0915.html | |
| 858 m_token = DPPX; | |
| 859 } | |
| 860 CASE("dpcm") { | |
| 861 m_token = DPCM; | |
| 862 } | |
| 863 CASE("dpi") { | |
| 864 m_token = DPI; | |
| 865 } | |
| 866 CASE("em") { | |
| 867 m_token = EMS; | |
| 868 } | |
| 869 CASE("ex") { | |
| 870 m_token = EXS; | |
| 871 } | |
| 872 CASE("fr") { | |
| 873 m_token = FR; | |
| 874 } | |
| 875 CASE("grad") { | |
| 876 m_token = GRADS; | |
| 877 } | |
| 878 CASE("hz") { | |
| 879 m_token = HERTZ; | |
| 880 } | |
| 881 CASE("in") { | |
| 882 m_token = INS; | |
| 883 } | |
| 884 CASE("khz") { | |
| 885 m_token = KHERTZ; | |
| 886 } | |
| 887 CASE("mm") { | |
| 888 m_token = MMS; | |
| 889 } | |
| 890 CASE("ms") { | |
| 891 m_token = MSECS; | |
| 892 } | |
| 893 CASE("px") { | |
| 894 m_token = PXS; | |
| 895 } | |
| 896 CASE("pt") { | |
| 897 m_token = PTS; | |
| 898 } | |
| 899 CASE("pc") { | |
| 900 m_token = PCS; | |
| 901 } | |
| 902 CASE("rad") { | |
| 903 m_token = RADS; | |
| 904 } | |
| 905 CASE("rem") { | |
| 906 m_token = REMS; | |
| 907 } | |
| 908 CASE("s") { | |
| 909 m_token = SECS; | |
| 910 } | |
| 911 CASE("turn") { | |
| 912 m_token = TURNS; | |
| 913 } | |
| 914 CASE("vw") { | |
| 915 m_token = VW; | |
| 916 } | |
| 917 CASE("vh") { | |
| 918 m_token = VH; | |
| 919 } | |
| 920 CASE("vmin") { | |
| 921 m_token = VMIN; | |
| 922 } | |
| 923 CASE("vmax") { | |
| 924 m_token = VMAX; | |
| 925 } | |
| 926 CASE("__qem") { | |
| 927 m_token = QEMS; | |
| 928 } | |
| 929 } | |
| 930 } | |
| 931 | |
| 932 template <typename CharacterType> | |
| 933 inline void CSSTokenizer::detectDashToken(int length) | |
| 934 { | |
| 935 CharacterType* name = tokenStart<CharacterType>(); | |
| 936 | |
| 937 // Ignore leading dash. | |
| 938 ++name; | |
| 939 --length; | |
| 940 | |
| 941 SWITCH(name, length) { | |
| 942 CASE("webkit-any") { | |
| 943 m_token = ANYFUNCTION; | |
| 944 } | |
| 945 CASE("webkit-calc") { | |
| 946 m_token = CALCFUNCTION; | |
| 947 } | |
| 948 } | |
| 949 } | |
| 950 | |
| 951 template <typename CharacterType> | |
| 952 inline void CSSTokenizer::detectAtToken(int length, bool hasEscape) | |
| 953 { | |
| 954 CharacterType* name = tokenStart<CharacterType>(); | |
| 955 ASSERT(name[0] == '@' && length >= 2); | |
| 956 | |
| 957 // Ignore leading @. | |
| 958 ++name; | |
| 959 --length; | |
| 960 | |
| 961 // charset, font-face, import, media, namespace, page, supports, | |
| 962 // -webkit-keyframes, keyframes, and -webkit-mediaquery are not affected by
hasEscape. | |
| 963 SWITCH(name, length) { | |
| 964 CASE("bottom-left") { | |
| 965 if (LIKELY(!hasEscape)) | |
| 966 m_token = BOTTOMLEFT_SYM; | |
| 967 } | |
| 968 CASE("bottom-right") { | |
| 969 if (LIKELY(!hasEscape)) | |
| 970 m_token = BOTTOMRIGHT_SYM; | |
| 971 } | |
| 972 CASE("bottom-center") { | |
| 973 if (LIKELY(!hasEscape)) | |
| 974 m_token = BOTTOMCENTER_SYM; | |
| 975 } | |
| 976 CASE("bottom-left-corner") { | |
| 977 if (LIKELY(!hasEscape)) | |
| 978 m_token = BOTTOMLEFTCORNER_SYM; | |
| 979 } | |
| 980 CASE("bottom-right-corner") { | |
| 981 if (LIKELY(!hasEscape)) | |
| 982 m_token = BOTTOMRIGHTCORNER_SYM; | |
| 983 } | |
| 984 CASE("charset") { | |
| 985 if (name - 1 == dataStart<CharacterType>()) | |
| 986 m_token = CHARSET_SYM; | |
| 987 } | |
| 988 CASE("font-face") { | |
| 989 m_token = FONT_FACE_SYM; | |
| 990 } | |
| 991 CASE("import") { | |
| 992 m_parsingMode = MediaQueryMode; | |
| 993 m_token = IMPORT_SYM; | |
| 994 } | |
| 995 CASE("keyframes") { | |
| 996 if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled()) | |
| 997 m_token = KEYFRAMES_SYM; | |
| 998 } | |
| 999 CASE("left-top") { | |
| 1000 if (LIKELY(!hasEscape)) | |
| 1001 m_token = LEFTTOP_SYM; | |
| 1002 } | |
| 1003 CASE("left-middle") { | |
| 1004 if (LIKELY(!hasEscape)) | |
| 1005 m_token = LEFTMIDDLE_SYM; | |
| 1006 } | |
| 1007 CASE("left-bottom") { | |
| 1008 if (LIKELY(!hasEscape)) | |
| 1009 m_token = LEFTBOTTOM_SYM; | |
| 1010 } | |
| 1011 CASE("media") { | |
| 1012 m_parsingMode = MediaQueryMode; | |
| 1013 m_token = MEDIA_SYM; | |
| 1014 } | |
| 1015 CASE("namespace") { | |
| 1016 m_token = NAMESPACE_SYM; | |
| 1017 } | |
| 1018 CASE("page") { | |
| 1019 m_token = PAGE_SYM; | |
| 1020 } | |
| 1021 CASE("right-top") { | |
| 1022 if (LIKELY(!hasEscape)) | |
| 1023 m_token = RIGHTTOP_SYM; | |
| 1024 } | |
| 1025 CASE("right-middle") { | |
| 1026 if (LIKELY(!hasEscape)) | |
| 1027 m_token = RIGHTMIDDLE_SYM; | |
| 1028 } | |
| 1029 CASE("right-bottom") { | |
| 1030 if (LIKELY(!hasEscape)) | |
| 1031 m_token = RIGHTBOTTOM_SYM; | |
| 1032 } | |
| 1033 CASE("supports") { | |
| 1034 m_parsingMode = SupportsMode; | |
| 1035 m_token = SUPPORTS_SYM; | |
| 1036 } | |
| 1037 CASE("top-left") { | |
| 1038 if (LIKELY(!hasEscape)) | |
| 1039 m_token = TOPLEFT_SYM; | |
| 1040 } | |
| 1041 CASE("top-right") { | |
| 1042 if (LIKELY(!hasEscape)) | |
| 1043 m_token = TOPRIGHT_SYM; | |
| 1044 } | |
| 1045 CASE("top-center") { | |
| 1046 if (LIKELY(!hasEscape)) | |
| 1047 m_token = TOPCENTER_SYM; | |
| 1048 } | |
| 1049 CASE("top-left-corner") { | |
| 1050 if (LIKELY(!hasEscape)) | |
| 1051 m_token = TOPLEFTCORNER_SYM; | |
| 1052 } | |
| 1053 CASE("top-right-corner") { | |
| 1054 if (LIKELY(!hasEscape)) | |
| 1055 m_token = TOPRIGHTCORNER_SYM; | |
| 1056 } | |
| 1057 CASE("viewport") { | |
| 1058 m_token = VIEWPORT_RULE_SYM; | |
| 1059 } | |
| 1060 CASE("-internal-rule") { | |
| 1061 if (LIKELY(!hasEscape && m_internal)) | |
| 1062 m_token = INTERNAL_RULE_SYM; | |
| 1063 } | |
| 1064 CASE("-internal-decls") { | |
| 1065 if (LIKELY(!hasEscape && m_internal)) | |
| 1066 m_token = INTERNAL_DECLS_SYM; | |
| 1067 } | |
| 1068 CASE("-internal-value") { | |
| 1069 if (LIKELY(!hasEscape && m_internal)) | |
| 1070 m_token = INTERNAL_VALUE_SYM; | |
| 1071 } | |
| 1072 CASE("-webkit-keyframes") { | |
| 1073 m_token = WEBKIT_KEYFRAMES_SYM; | |
| 1074 } | |
| 1075 CASE("-internal-selector") { | |
| 1076 if (LIKELY(!hasEscape && m_internal)) | |
| 1077 m_token = INTERNAL_SELECTOR_SYM; | |
| 1078 } | |
| 1079 CASE("-internal-medialist") { | |
| 1080 if (!m_internal) | |
| 1081 return; | |
| 1082 m_parsingMode = MediaQueryMode; | |
| 1083 m_token = INTERNAL_MEDIALIST_SYM; | |
| 1084 } | |
| 1085 CASE("-internal-keyframe-rule") { | |
| 1086 if (LIKELY(!hasEscape && m_internal)) | |
| 1087 m_token = INTERNAL_KEYFRAME_RULE_SYM; | |
| 1088 } | |
| 1089 CASE("-internal-keyframe-key-list") { | |
| 1090 if (!m_internal) | |
| 1091 return; | |
| 1092 m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM; | |
| 1093 } | |
| 1094 CASE("-internal-supports-condition") { | |
| 1095 if (!m_internal) | |
| 1096 return; | |
| 1097 m_parsingMode = SupportsMode; | |
| 1098 m_token = INTERNAL_SUPPORTS_CONDITION_SYM; | |
| 1099 } | |
| 1100 } | |
| 1101 } | |
| 1102 | |
| 1103 template <typename CharacterType> | |
| 1104 inline void CSSTokenizer::detectSupportsToken(int length) | |
| 1105 { | |
| 1106 ASSERT(m_parsingMode == SupportsMode); | |
| 1107 CharacterType* name = tokenStart<CharacterType>(); | |
| 1108 | |
| 1109 SWITCH(name, length) { | |
| 1110 CASE("or") { | |
| 1111 m_token = SUPPORTS_OR; | |
| 1112 } | |
| 1113 CASE("and") { | |
| 1114 m_token = SUPPORTS_AND; | |
| 1115 } | |
| 1116 CASE("not") { | |
| 1117 m_token = SUPPORTS_NOT; | |
| 1118 } | |
| 1119 } | |
| 1120 } | |
| 1121 | |
| 1122 template <typename SrcCharacterType> | |
| 1123 int CSSTokenizer::realLex(void* yylvalWithoutType) | |
| 1124 { | |
| 1125 YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType); | |
| 1126 // Write pointer for the next character. | |
| 1127 SrcCharacterType* result; | |
| 1128 CSSParserString resultString; | |
| 1129 bool hasEscape; | |
| 1130 | |
| 1131 // The input buffer is terminated by a \0 character, so | |
| 1132 // it is safe to read one character ahead of a known non-null. | |
| 1133 #if ENABLE(ASSERT) | |
| 1134 // In debug we check with an ASSERT that the length is > 0 for string types. | |
| 1135 yylval->string.clear(); | |
| 1136 #endif | |
| 1137 | |
| 1138 restartAfterComment: | |
| 1139 result = currentCharacter<SrcCharacterType>(); | |
| 1140 setTokenStart(result); | |
| 1141 m_tokenStartLineNumber = m_lineNumber; | |
| 1142 m_token = *currentCharacter<SrcCharacterType>(); | |
| 1143 ++currentCharacter<SrcCharacterType>(); | |
| 1144 | |
| 1145 switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdenti
fierStart) { | |
| 1146 case CharacterCaselessU: | |
| 1147 if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) { | |
| 1148 if (parseUnicodeRange<SrcCharacterType>()) { | |
| 1149 m_token = UNICODERANGE; | |
| 1150 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara
cter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
| 1151 break; | |
| 1152 } | |
| 1153 } | |
| 1154 // Fall through to CharacterIdentifierStart. | |
| 1155 | |
| 1156 case CharacterIdentifierStart: | |
| 1157 --currentCharacter<SrcCharacterType>(); | |
| 1158 parseIdentifier(result, yylval->string, hasEscape); | |
| 1159 m_token = IDENT; | |
| 1160 | |
| 1161 if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) { | |
| 1162 if (m_parsingMode == SupportsMode && !hasEscape) { | |
| 1163 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCha
racterType>()); | |
| 1164 if (m_token != IDENT) | |
| 1165 break; | |
| 1166 } | |
| 1167 | |
| 1168 m_token = FUNCTION; | |
| 1169 if (!hasEscape) | |
| 1170 detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<Sr
cCharacterType>()); | |
| 1171 | |
| 1172 // Skip parenthesis | |
| 1173 ++currentCharacter<SrcCharacterType>(); | |
| 1174 ++result; | |
| 1175 ++yylval->string.m_length; | |
| 1176 | |
| 1177 if (m_token == URI) { | |
| 1178 m_token = FUNCTION; | |
| 1179 // Check whether it is really an URI. | |
| 1180 if (yylval->string.is8Bit()) | |
| 1181 parseURI<LChar>(yylval->string); | |
| 1182 else | |
| 1183 parseURI<UChar>(yylval->string); | |
| 1184 } | |
| 1185 } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) { | |
| 1186 if (m_parsingMode == MediaQueryMode) { | |
| 1187 detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcC
haracterType>()); | |
| 1188 } else if (m_parsingMode == SupportsMode) { | |
| 1189 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCha
racterType>()); | |
| 1190 } else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqua
l(tokenStart<SrcCharacterType>()[0], 'n')) { | |
| 1191 if (result - tokenStart<SrcCharacterType>() == 1) { | |
| 1192 // String "n" is IDENT but "n+1" is NTH. | |
| 1193 if (parseNthChildExtra<SrcCharacterType>()) { | |
| 1194 m_token = NTH; | |
| 1195 yylval->string.m_length = currentCharacter<SrcCharacterT
ype>() - tokenStart<SrcCharacterType>(); | |
| 1196 } | |
| 1197 } else if (result - tokenStart<SrcCharacterType>() >= 2 && token
Start<SrcCharacterType>()[1] == '-') { | |
| 1198 // String "n-" is IDENT but "n-1" is NTH. | |
| 1199 // Set currentCharacter to '-' to continue parsing. | |
| 1200 SrcCharacterType* nextCharacter = result; | |
| 1201 currentCharacter<SrcCharacterType>() = tokenStart<SrcCharact
erType>() + 1; | |
| 1202 if (parseNthChildExtra<SrcCharacterType>()) { | |
| 1203 m_token = NTH; | |
| 1204 yylval->string.setLength(currentCharacter<SrcCharacterTy
pe>() - tokenStart<SrcCharacterType>()); | |
| 1205 } else { | |
| 1206 // Revert the change to currentCharacter if unsuccessful
. | |
| 1207 currentCharacter<SrcCharacterType>() = nextCharacter; | |
| 1208 } | |
| 1209 } | |
| 1210 } | |
| 1211 } | |
| 1212 break; | |
| 1213 | |
| 1214 case CharacterDot: | |
| 1215 if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) | |
| 1216 break; | |
| 1217 // Fall through to CharacterNumber. | |
| 1218 | |
| 1219 case CharacterNumber: { | |
| 1220 bool dotSeen = (m_token == '.'); | |
| 1221 | |
| 1222 while (true) { | |
| 1223 if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) { | |
| 1224 // Only one dot is allowed for a number, | |
| 1225 // and it must be followed by a digit. | |
| 1226 if (currentCharacter<SrcCharacterType>()[0] != '.' || dotSeen ||
!isASCIIDigit(currentCharacter<SrcCharacterType>()[1])) | |
| 1227 break; | |
| 1228 dotSeen = true; | |
| 1229 } | |
| 1230 ++currentCharacter<SrcCharacterType>(); | |
| 1231 } | |
| 1232 | |
| 1233 if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaC
aselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) { | |
| 1234 // "[0-9]+n" is always an NthChild. | |
| 1235 ++currentCharacter<SrcCharacterType>(); | |
| 1236 parseNthChildExtra<SrcCharacterType>(); | |
| 1237 m_token = NTH; | |
| 1238 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter
<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
| 1239 break; | |
| 1240 } | |
| 1241 | |
| 1242 // Use SVG parser for numbers on SVG presentation attributes. | |
| 1243 if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) { | |
| 1244 // We need to take care of units like 'em' or 'ex'. | |
| 1245 SrcCharacterType* character = currentCharacter<SrcCharacterType>(); | |
| 1246 if (isASCIIAlphaCaselessEqual(*character, 'e')) { | |
| 1247 ASSERT(character - tokenStart<SrcCharacterType>() > 0); | |
| 1248 ++character; | |
| 1249 if (*character == '-' || *character == '+' || isASCIIDigit(*char
acter)) { | |
| 1250 ++character; | |
| 1251 while (isASCIIDigit(*character)) | |
| 1252 ++character; | |
| 1253 // Use FLOATTOKEN if the string contains exponents. | |
| 1254 dotSeen = true; | |
| 1255 currentCharacter<SrcCharacterType>() = character; | |
| 1256 } | |
| 1257 } | |
| 1258 if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - toke
nStart<SrcCharacterType>(), yylval->number)) | |
| 1259 break; | |
| 1260 } else { | |
| 1261 yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(),
currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
| 1262 } | |
| 1263 | |
| 1264 // Type of the function. | |
| 1265 if (isIdentifierStart<SrcCharacterType>()) { | |
| 1266 SrcCharacterType* type = currentCharacter<SrcCharacterType>(); | |
| 1267 result = currentCharacter<SrcCharacterType>(); | |
| 1268 | |
| 1269 parseIdentifier(result, resultString, hasEscape); | |
| 1270 | |
| 1271 m_token = DIMEN; | |
| 1272 if (!hasEscape) | |
| 1273 detectNumberToken(type, currentCharacter<SrcCharacterType>() - t
ype); | |
| 1274 | |
| 1275 if (m_token == DIMEN) { | |
| 1276 // The decoded number is overwritten, but this is intentional. | |
| 1277 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara
cter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
| 1278 } | |
| 1279 } else if (*currentCharacter<SrcCharacterType>() == '%') { | |
| 1280 // Although the CSS grammar says {num}% we follow | |
| 1281 // webkit at the moment which uses {num}%+. | |
| 1282 do { | |
| 1283 ++currentCharacter<SrcCharacterType>(); | |
| 1284 } while (*currentCharacter<SrcCharacterType>() == '%'); | |
| 1285 m_token = PERCENTAGE; | |
| 1286 } else { | |
| 1287 m_token = dotSeen ? FLOATTOKEN : INTEGER; | |
| 1288 } | |
| 1289 break; | |
| 1290 } | |
| 1291 | |
| 1292 case CharacterDash: | |
| 1293 if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) { | |
| 1294 --currentCharacter<SrcCharacterType>(); | |
| 1295 parseIdentifier(result, resultString, hasEscape); | |
| 1296 m_token = IDENT; | |
| 1297 | |
| 1298 if (*currentCharacter<SrcCharacterType>() == '(') { | |
| 1299 m_token = FUNCTION; | |
| 1300 if (!hasEscape) | |
| 1301 detectDashToken<SrcCharacterType>(result - tokenStart<SrcCha
racterType>()); | |
| 1302 ++currentCharacter<SrcCharacterType>(); | |
| 1303 ++result; | |
| 1304 } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape &&
isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) { | |
| 1305 if (result - tokenStart<SrcCharacterType>() == 2) { | |
| 1306 // String "-n" is IDENT but "-n+1" is NTH. | |
| 1307 if (parseNthChildExtra<SrcCharacterType>()) { | |
| 1308 m_token = NTH; | |
| 1309 result = currentCharacter<SrcCharacterType>(); | |
| 1310 } | |
| 1311 } else if (result - tokenStart<SrcCharacterType>() >= 3 && token
Start<SrcCharacterType>()[2] == '-') { | |
| 1312 // String "-n-" is IDENT but "-n-1" is NTH. | |
| 1313 // Set currentCharacter to second '-' of '-n-' to continue p
arsing. | |
| 1314 SrcCharacterType* nextCharacter = result; | |
| 1315 currentCharacter<SrcCharacterType>() = tokenStart<SrcCharact
erType>() + 2; | |
| 1316 if (parseNthChildExtra<SrcCharacterType>()) { | |
| 1317 m_token = NTH; | |
| 1318 result = currentCharacter<SrcCharacterType>(); | |
| 1319 } else { | |
| 1320 // Revert the change to currentCharacter if unsuccessful
. | |
| 1321 currentCharacter<SrcCharacterType>() = nextCharacter; | |
| 1322 } | |
| 1323 } | |
| 1324 } | |
| 1325 resultString.setLength(result - tokenStart<SrcCharacterType>()); | |
| 1326 yylval->string = resultString; | |
| 1327 } else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentChar
acter<SrcCharacterType>()[1] == '>') { | |
| 1328 currentCharacter<SrcCharacterType>() += 2; | |
| 1329 m_token = SGML_CD; | |
| 1330 } else if (UNLIKELY(m_parsingMode == NthChildMode)) { | |
| 1331 // "-[0-9]+n" is always an NthChild. | |
| 1332 if (parseNthChild<SrcCharacterType>()) { | |
| 1333 parseNthChildExtra<SrcCharacterType>(); | |
| 1334 m_token = NTH; | |
| 1335 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara
cter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
| 1336 } | |
| 1337 } | |
| 1338 break; | |
| 1339 | |
| 1340 case CharacterOther: | |
| 1341 // m_token is simply the current character. | |
| 1342 break; | |
| 1343 | |
| 1344 case CharacterNull: | |
| 1345 // Do not advance pointer at the end of input. | |
| 1346 --currentCharacter<SrcCharacterType>(); | |
| 1347 break; | |
| 1348 | |
| 1349 case CharacterWhiteSpace: | |
| 1350 m_token = WHITESPACE; | |
| 1351 // Might start with a '\n'. | |
| 1352 --currentCharacter<SrcCharacterType>(); | |
| 1353 do { | |
| 1354 if (*currentCharacter<SrcCharacterType>() == '\n') | |
| 1355 ++m_lineNumber; | |
| 1356 ++currentCharacter<SrcCharacterType>(); | |
| 1357 } while (*currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICh
aracters[*currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace)); | |
| 1358 break; | |
| 1359 | |
| 1360 case CharacterEndMediaQueryOrSupports: | |
| 1361 if (m_parsingMode == MediaQueryMode || m_parsingMode == SupportsMode) | |
| 1362 m_parsingMode = NormalMode; | |
| 1363 break; | |
| 1364 | |
| 1365 case CharacterEndNthChild: | |
| 1366 if (m_parsingMode == NthChildMode) | |
| 1367 m_parsingMode = NormalMode; | |
| 1368 break; | |
| 1369 | |
| 1370 case CharacterQuote: | |
| 1371 if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token, Ab
ortIfInvalid)) { | |
| 1372 ++result; | |
| 1373 parseString<SrcCharacterType>(result, yylval->string, m_token); | |
| 1374 m_token = STRING; | |
| 1375 } | |
| 1376 break; | |
| 1377 | |
| 1378 case CharacterExclamationMark: { | |
| 1379 SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterTy
pe>()); | |
| 1380 if (isEqualToCSSIdentifier(start, "important")) { | |
| 1381 m_token = IMPORTANT_SYM; | |
| 1382 currentCharacter<SrcCharacterType>() = start + 9; | |
| 1383 } | |
| 1384 break; | |
| 1385 } | |
| 1386 | |
| 1387 case CharacterHashmark: { | |
| 1388 SrcCharacterType* start = currentCharacter<SrcCharacterType>(); | |
| 1389 result = currentCharacter<SrcCharacterType>(); | |
| 1390 | |
| 1391 if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) { | |
| 1392 // This must be a valid hex number token. | |
| 1393 do { | |
| 1394 ++currentCharacter<SrcCharacterType>(); | |
| 1395 } while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>())); | |
| 1396 m_token = HEX; | |
| 1397 yylval->string.init(start, currentCharacter<SrcCharacterType>() - st
art); | |
| 1398 } else if (isIdentifierStart<SrcCharacterType>()) { | |
| 1399 m_token = IDSEL; | |
| 1400 parseIdentifier(result, yylval->string, hasEscape); | |
| 1401 if (!hasEscape) { | |
| 1402 // Check whether the identifier is also a valid hex number. | |
| 1403 SrcCharacterType* current = start; | |
| 1404 m_token = HEX; | |
| 1405 do { | |
| 1406 if (!isASCIIHexDigit(*current)) { | |
| 1407 m_token = IDSEL; | |
| 1408 break; | |
| 1409 } | |
| 1410 ++current; | |
| 1411 } while (current < result); | |
| 1412 } | |
| 1413 } | |
| 1414 break; | |
| 1415 } | |
| 1416 | |
| 1417 case CharacterSlash: | |
| 1418 // Ignore comments. They are not even considered as white spaces. | |
| 1419 if (*currentCharacter<SrcCharacterType>() == '*') { | |
| 1420 const CSSParserLocation startLocation = currentLocation(); | |
| 1421 if (m_parser.m_observer) { | |
| 1422 unsigned startOffset = currentCharacter<SrcCharacterType>() - da
taStart<SrcCharacterType>() - 1; // Start with a slash. | |
| 1423 m_parser.m_observer->startComment(startOffset - m_parsedTextPref
ixLength); | |
| 1424 } | |
| 1425 ++currentCharacter<SrcCharacterType>(); | |
| 1426 while (currentCharacter<SrcCharacterType>()[0] != '*' || currentChar
acter<SrcCharacterType>()[1] != '/') { | |
| 1427 if (*currentCharacter<SrcCharacterType>() == '\n') | |
| 1428 ++m_lineNumber; | |
| 1429 if (*currentCharacter<SrcCharacterType>() == '\0') { | |
| 1430 // Unterminated comments are simply ignored. | |
| 1431 currentCharacter<SrcCharacterType>() -= 2; | |
| 1432 m_parser.reportError(startLocation, UnterminatedCommentCSSEr
ror); | |
| 1433 break; | |
| 1434 } | |
| 1435 ++currentCharacter<SrcCharacterType>(); | |
| 1436 } | |
| 1437 currentCharacter<SrcCharacterType>() += 2; | |
| 1438 if (m_parser.m_observer) { | |
| 1439 unsigned endOffset = currentCharacter<SrcCharacterType>() - data
Start<SrcCharacterType>(); | |
| 1440 unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1
- m_parsedTextSuffixLength); | |
| 1441 m_parser.m_observer->endComment(std::min(endOffset, userTextEndO
ffset) - m_parsedTextPrefixLength); | |
| 1442 } | |
| 1443 goto restartAfterComment; | |
| 1444 } | |
| 1445 break; | |
| 1446 | |
| 1447 case CharacterDollar: | |
| 1448 if (*currentCharacter<SrcCharacterType>() == '=') { | |
| 1449 ++currentCharacter<SrcCharacterType>(); | |
| 1450 m_token = ENDSWITH; | |
| 1451 } | |
| 1452 break; | |
| 1453 | |
| 1454 case CharacterAsterisk: | |
| 1455 if (*currentCharacter<SrcCharacterType>() == '=') { | |
| 1456 ++currentCharacter<SrcCharacterType>(); | |
| 1457 m_token = CONTAINS; | |
| 1458 } | |
| 1459 break; | |
| 1460 | |
| 1461 case CharacterPlus: | |
| 1462 if (UNLIKELY(m_parsingMode == NthChildMode)) { | |
| 1463 // Simplest case. "+[0-9]*n" is always NthChild. | |
| 1464 if (parseNthChild<SrcCharacterType>()) { | |
| 1465 parseNthChildExtra<SrcCharacterType>(); | |
| 1466 m_token = NTH; | |
| 1467 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara
cter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); | |
| 1468 } | |
| 1469 } | |
| 1470 break; | |
| 1471 | |
| 1472 case CharacterLess: | |
| 1473 if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<S
rcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-')
{ | |
| 1474 currentCharacter<SrcCharacterType>() += 3; | |
| 1475 m_token = SGML_CD; | |
| 1476 } | |
| 1477 break; | |
| 1478 | |
| 1479 case CharacterAt: | |
| 1480 if (isIdentifierStart<SrcCharacterType>()) { | |
| 1481 m_token = ATKEYWORD; | |
| 1482 ++result; | |
| 1483 parseIdentifier(result, resultString, hasEscape); | |
| 1484 // The standard enables unicode escapes in at-rules. In this case on
ly the resultString will contain the | |
| 1485 // correct identifier, hence we have to use it to determine its leng
th instead of the usual pointer arithmetic. | |
| 1486 detectAtToken<SrcCharacterType>(resultString.length() + 1, hasEscape
); | |
| 1487 } | |
| 1488 break; | |
| 1489 | |
| 1490 case CharacterBackSlash: | |
| 1491 if (isCSSEscape(*currentCharacter<SrcCharacterType>())) { | |
| 1492 --currentCharacter<SrcCharacterType>(); | |
| 1493 parseIdentifier(result, yylval->string, hasEscape); | |
| 1494 m_token = IDENT; | |
| 1495 } | |
| 1496 break; | |
| 1497 | |
| 1498 case CharacterXor: | |
| 1499 if (*currentCharacter<SrcCharacterType>() == '=') { | |
| 1500 ++currentCharacter<SrcCharacterType>(); | |
| 1501 m_token = BEGINSWITH; | |
| 1502 } | |
| 1503 break; | |
| 1504 | |
| 1505 case CharacterVerticalBar: | |
| 1506 if (*currentCharacter<SrcCharacterType>() == '=') { | |
| 1507 ++currentCharacter<SrcCharacterType>(); | |
| 1508 m_token = DASHMATCH; | |
| 1509 } | |
| 1510 break; | |
| 1511 | |
| 1512 case CharacterTilde: | |
| 1513 if (*currentCharacter<SrcCharacterType>() == '=') { | |
| 1514 ++currentCharacter<SrcCharacterType>(); | |
| 1515 m_token = INCLUDES; | |
| 1516 } | |
| 1517 break; | |
| 1518 | |
| 1519 default: | |
| 1520 ASSERT_NOT_REACHED(); | |
| 1521 break; | |
| 1522 } | |
| 1523 | |
| 1524 return m_token; | |
| 1525 } | |
| 1526 | |
| 1527 template <> | |
| 1528 inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart) | |
| 1529 { | |
| 1530 m_tokenStart.ptr8 = tokenStart; | |
| 1531 } | |
| 1532 | |
| 1533 template <> | |
| 1534 inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart) | |
| 1535 { | |
| 1536 m_tokenStart.ptr16 = tokenStart; | |
| 1537 } | |
| 1538 | |
| 1539 void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, con
st String& string, const char* suffix, unsigned suffixLength) | |
| 1540 { | |
| 1541 m_parsedTextPrefixLength = prefixLength; | |
| 1542 m_parsedTextSuffixLength = suffixLength; | |
| 1543 unsigned stringLength = string.length(); | |
| 1544 unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuff
ixLength + 1; | |
| 1545 m_length = length; | |
| 1546 | |
| 1547 if (!stringLength || string.is8Bit()) { | |
| 1548 m_dataStart8 = adoptArrayPtr(new LChar[length]); | |
| 1549 for (unsigned i = 0; i < m_parsedTextPrefixLength; i++) | |
| 1550 m_dataStart8[i] = prefix[i]; | |
| 1551 | |
| 1552 if (stringLength) | |
| 1553 memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.charact
ers8(), stringLength * sizeof(LChar)); | |
| 1554 | |
| 1555 unsigned start = m_parsedTextPrefixLength + stringLength; | |
| 1556 unsigned end = start + suffixLength; | |
| 1557 for (unsigned i = start; i < end; i++) | |
| 1558 m_dataStart8[i] = suffix[i - start]; | |
| 1559 | |
| 1560 m_dataStart8[length - 1] = 0; | |
| 1561 | |
| 1562 m_is8BitSource = true; | |
| 1563 m_currentCharacter8 = m_dataStart8.get(); | |
| 1564 m_currentCharacter16 = 0; | |
| 1565 setTokenStart<LChar>(m_currentCharacter8); | |
| 1566 m_lexFunc = &CSSTokenizer::realLex<LChar>; | |
| 1567 return; | |
| 1568 } | |
| 1569 | |
| 1570 m_dataStart16 = adoptArrayPtr(new UChar[length]); | |
| 1571 for (unsigned i = 0; i < m_parsedTextPrefixLength; i++) | |
| 1572 m_dataStart16[i] = prefix[i]; | |
| 1573 | |
| 1574 ASSERT(stringLength); | |
| 1575 memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16()
, stringLength * sizeof(UChar)); | |
| 1576 | |
| 1577 unsigned start = m_parsedTextPrefixLength + stringLength; | |
| 1578 unsigned end = start + suffixLength; | |
| 1579 for (unsigned i = start; i < end; i++) | |
| 1580 m_dataStart16[i] = suffix[i - start]; | |
| 1581 | |
| 1582 m_dataStart16[length - 1] = 0; | |
| 1583 | |
| 1584 m_is8BitSource = false; | |
| 1585 m_currentCharacter8 = 0; | |
| 1586 m_currentCharacter16 = m_dataStart16.get(); | |
| 1587 setTokenStart<UChar>(m_currentCharacter16); | |
| 1588 m_lexFunc = &CSSTokenizer::realLex<UChar>; | |
| 1589 } | |
| 1590 | |
| 1591 } // namespace blink | |
| OLD | NEW |