| Index: Source/core/css/CSSTokenizer-in.cpp
|
| diff --git a/Source/core/css/CSSTokenizer-in.cpp b/Source/core/css/CSSTokenizer-in.cpp
|
| deleted file mode 100644
|
| index 523942ce5ef069354bd520c4e77add46ec89fd50..0000000000000000000000000000000000000000
|
| --- a/Source/core/css/CSSTokenizer-in.cpp
|
| +++ /dev/null
|
| @@ -1,1591 +0,0 @@
|
| -/*
|
| - * Copyright (C) 2003 Lars Knoll (knoll@kde.org)
|
| - * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com)
|
| - * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved.
|
| - * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com>
|
| - * Copyright (C) 2008 Eric Seidel <eric@webkit.org>
|
| - * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
|
| - * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved.
|
| - * Copyright (C) 2012 Intel Corporation. All rights reserved.
|
| - *
|
| - * This library is free software; you can redistribute it and/or
|
| - * modify it under the terms of the GNU Library General Public
|
| - * License as published by the Free Software Foundation; either
|
| - * version 2 of the License, or (at your option) any later version.
|
| - *
|
| - * This library is distributed in the hope that it will be useful,
|
| - * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| - * Library General Public License for more details.
|
| - *
|
| - * You should have received a copy of the GNU Library General Public License
|
| - * along with this library; see the file COPYING.LIB. If not, write to
|
| - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
| - * Boston, MA 02110-1301, USA.
|
| - */
|
| -
|
| -#include "config.h"
|
| -#include "core/css/CSSTokenizer.h"
|
| -
|
| -#include "core/css/CSSKeyframeRule.h"
|
| -#include "core/css/parser/BisonCSSParser.h"
|
| -#include "core/css/CSSParserValues.h"
|
| -#include "core/css/MediaQuery.h"
|
| -#include "core/css/StyleRule.h"
|
| -#include "core/html/parser/HTMLParserIdioms.h"
|
| -#include "core/svg/SVGParserUtilities.h"
|
| -
|
| -namespace blink {
|
| -
|
| -#include "core/CSSGrammar.h"
|
| -
|
| -enum CharacterType {
|
| - // Types for the main switch.
|
| -
|
| - // The first 4 types must be grouped together, as they
|
| - // represent the allowed chars in an identifier.
|
| - CharacterCaselessU,
|
| - CharacterIdentifierStart,
|
| - CharacterNumber,
|
| - CharacterDash,
|
| -
|
| - CharacterOther,
|
| - CharacterNull,
|
| - CharacterWhiteSpace,
|
| - CharacterEndMediaQueryOrSupports,
|
| - CharacterEndNthChild,
|
| - CharacterQuote,
|
| - CharacterExclamationMark,
|
| - CharacterHashmark,
|
| - CharacterDollar,
|
| - CharacterAsterisk,
|
| - CharacterPlus,
|
| - CharacterDot,
|
| - CharacterSlash,
|
| - CharacterLess,
|
| - CharacterAt,
|
| - CharacterBackSlash,
|
| - CharacterXor,
|
| - CharacterVerticalBar,
|
| - CharacterTilde,
|
| -};
|
| -
|
| -// 128 ASCII codes
|
| -static const CharacterType typesOfASCIICharacters[128] = {
|
| -/* 0 - Null */ CharacterNull,
|
| -/* 1 - Start of Heading */ CharacterOther,
|
| -/* 2 - Start of Text */ CharacterOther,
|
| -/* 3 - End of Text */ CharacterOther,
|
| -/* 4 - End of Transm. */ CharacterOther,
|
| -/* 5 - Enquiry */ CharacterOther,
|
| -/* 6 - Acknowledgment */ CharacterOther,
|
| -/* 7 - Bell */ CharacterOther,
|
| -/* 8 - Back Space */ CharacterOther,
|
| -/* 9 - Horizontal Tab */ CharacterWhiteSpace,
|
| -/* 10 - Line Feed */ CharacterWhiteSpace,
|
| -/* 11 - Vertical Tab */ CharacterOther,
|
| -/* 12 - Form Feed */ CharacterWhiteSpace,
|
| -/* 13 - Carriage Return */ CharacterWhiteSpace,
|
| -/* 14 - Shift Out */ CharacterOther,
|
| -/* 15 - Shift In */ CharacterOther,
|
| -/* 16 - Data Line Escape */ CharacterOther,
|
| -/* 17 - Device Control 1 */ CharacterOther,
|
| -/* 18 - Device Control 2 */ CharacterOther,
|
| -/* 19 - Device Control 3 */ CharacterOther,
|
| -/* 20 - Device Control 4 */ CharacterOther,
|
| -/* 21 - Negative Ack. */ CharacterOther,
|
| -/* 22 - Synchronous Idle */ CharacterOther,
|
| -/* 23 - End of Transmit */ CharacterOther,
|
| -/* 24 - Cancel */ CharacterOther,
|
| -/* 25 - End of Medium */ CharacterOther,
|
| -/* 26 - Substitute */ CharacterOther,
|
| -/* 27 - Escape */ CharacterOther,
|
| -/* 28 - File Separator */ CharacterOther,
|
| -/* 29 - Group Separator */ CharacterOther,
|
| -/* 30 - Record Separator */ CharacterOther,
|
| -/* 31 - Unit Separator */ CharacterOther,
|
| -/* 32 - Space */ CharacterWhiteSpace,
|
| -/* 33 - ! */ CharacterExclamationMark,
|
| -/* 34 - " */ CharacterQuote,
|
| -/* 35 - # */ CharacterHashmark,
|
| -/* 36 - $ */ CharacterDollar,
|
| -/* 37 - % */ CharacterOther,
|
| -/* 38 - & */ CharacterOther,
|
| -/* 39 - ' */ CharacterQuote,
|
| -/* 40 - ( */ CharacterOther,
|
| -/* 41 - ) */ CharacterEndNthChild,
|
| -/* 42 - * */ CharacterAsterisk,
|
| -/* 43 - + */ CharacterPlus,
|
| -/* 44 - , */ CharacterOther,
|
| -/* 45 - - */ CharacterDash,
|
| -/* 46 - . */ CharacterDot,
|
| -/* 47 - / */ CharacterSlash,
|
| -/* 48 - 0 */ CharacterNumber,
|
| -/* 49 - 1 */ CharacterNumber,
|
| -/* 50 - 2 */ CharacterNumber,
|
| -/* 51 - 3 */ CharacterNumber,
|
| -/* 52 - 4 */ CharacterNumber,
|
| -/* 53 - 5 */ CharacterNumber,
|
| -/* 54 - 6 */ CharacterNumber,
|
| -/* 55 - 7 */ CharacterNumber,
|
| -/* 56 - 8 */ CharacterNumber,
|
| -/* 57 - 9 */ CharacterNumber,
|
| -/* 58 - : */ CharacterOther,
|
| -/* 59 - ; */ CharacterEndMediaQueryOrSupports,
|
| -/* 60 - < */ CharacterLess,
|
| -/* 61 - = */ CharacterOther,
|
| -/* 62 - > */ CharacterOther,
|
| -/* 63 - ? */ CharacterOther,
|
| -/* 64 - @ */ CharacterAt,
|
| -/* 65 - A */ CharacterIdentifierStart,
|
| -/* 66 - B */ CharacterIdentifierStart,
|
| -/* 67 - C */ CharacterIdentifierStart,
|
| -/* 68 - D */ CharacterIdentifierStart,
|
| -/* 69 - E */ CharacterIdentifierStart,
|
| -/* 70 - F */ CharacterIdentifierStart,
|
| -/* 71 - G */ CharacterIdentifierStart,
|
| -/* 72 - H */ CharacterIdentifierStart,
|
| -/* 73 - I */ CharacterIdentifierStart,
|
| -/* 74 - J */ CharacterIdentifierStart,
|
| -/* 75 - K */ CharacterIdentifierStart,
|
| -/* 76 - L */ CharacterIdentifierStart,
|
| -/* 77 - M */ CharacterIdentifierStart,
|
| -/* 78 - N */ CharacterIdentifierStart,
|
| -/* 79 - O */ CharacterIdentifierStart,
|
| -/* 80 - P */ CharacterIdentifierStart,
|
| -/* 81 - Q */ CharacterIdentifierStart,
|
| -/* 82 - R */ CharacterIdentifierStart,
|
| -/* 83 - S */ CharacterIdentifierStart,
|
| -/* 84 - T */ CharacterIdentifierStart,
|
| -/* 85 - U */ CharacterCaselessU,
|
| -/* 86 - V */ CharacterIdentifierStart,
|
| -/* 87 - W */ CharacterIdentifierStart,
|
| -/* 88 - X */ CharacterIdentifierStart,
|
| -/* 89 - Y */ CharacterIdentifierStart,
|
| -/* 90 - Z */ CharacterIdentifierStart,
|
| -/* 91 - [ */ CharacterOther,
|
| -/* 92 - \ */ CharacterBackSlash,
|
| -/* 93 - ] */ CharacterOther,
|
| -/* 94 - ^ */ CharacterXor,
|
| -/* 95 - _ */ CharacterIdentifierStart,
|
| -/* 96 - ` */ CharacterOther,
|
| -/* 97 - a */ CharacterIdentifierStart,
|
| -/* 98 - b */ CharacterIdentifierStart,
|
| -/* 99 - c */ CharacterIdentifierStart,
|
| -/* 100 - d */ CharacterIdentifierStart,
|
| -/* 101 - e */ CharacterIdentifierStart,
|
| -/* 102 - f */ CharacterIdentifierStart,
|
| -/* 103 - g */ CharacterIdentifierStart,
|
| -/* 104 - h */ CharacterIdentifierStart,
|
| -/* 105 - i */ CharacterIdentifierStart,
|
| -/* 106 - j */ CharacterIdentifierStart,
|
| -/* 107 - k */ CharacterIdentifierStart,
|
| -/* 108 - l */ CharacterIdentifierStart,
|
| -/* 109 - m */ CharacterIdentifierStart,
|
| -/* 110 - n */ CharacterIdentifierStart,
|
| -/* 111 - o */ CharacterIdentifierStart,
|
| -/* 112 - p */ CharacterIdentifierStart,
|
| -/* 113 - q */ CharacterIdentifierStart,
|
| -/* 114 - r */ CharacterIdentifierStart,
|
| -/* 115 - s */ CharacterIdentifierStart,
|
| -/* 116 - t */ CharacterIdentifierStart,
|
| -/* 117 - u */ CharacterCaselessU,
|
| -/* 118 - v */ CharacterIdentifierStart,
|
| -/* 119 - w */ CharacterIdentifierStart,
|
| -/* 120 - x */ CharacterIdentifierStart,
|
| -/* 121 - y */ CharacterIdentifierStart,
|
| -/* 122 - z */ CharacterIdentifierStart,
|
| -/* 123 - { */ CharacterEndMediaQueryOrSupports,
|
| -/* 124 - | */ CharacterVerticalBar,
|
| -/* 125 - } */ CharacterOther,
|
| -/* 126 - ~ */ CharacterTilde,
|
| -/* 127 - Delete */ CharacterOther,
|
| -};
|
| -
|
| -// Utility functions for the CSS tokenizer.
|
| -
|
| -template <typename CharacterType>
|
| -static inline bool isCSSLetter(CharacterType character)
|
| -{
|
| - return character >= 128 || typesOfASCIICharacters[character] <= CharacterDash;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -static inline bool isCSSEscape(CharacterType character)
|
| -{
|
| - return character >= ' ' && character != 127;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -static inline bool isURILetter(CharacterType character)
|
| -{
|
| - return (character >= '*' && character != 127) || (character >= '#' && character <= '&') || character == '!';
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter)
|
| -{
|
| - return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || currentCharacter[0] >= 128
|
| - || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1]));
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char* constantString)
|
| -{
|
| - // Compare an character memory data with a zero terminated string.
|
| - do {
|
| - // The input must be part of an identifier if constantChar or constString
|
| - // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to '-'.
|
| - ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantString == '-');
|
| - ASSERT(*constantString != '-' || isCSSLetter(*cssString));
|
| - if (toASCIILowerUnchecked(*cssString++) != (*constantString++))
|
| - return false;
|
| - } while (*constantString);
|
| - return true;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, const char* constantString)
|
| -{
|
| - ASSERT(*constantString);
|
| -
|
| - do {
|
| - if (*string++ != *constantString++)
|
| - return false;
|
| - } while (*constantString);
|
| - return true;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter)
|
| -{
|
| - // Returns with 0, if escape check is failed. Otherwise
|
| - // it returns with the following character.
|
| - ASSERT(*currentCharacter == '\\');
|
| -
|
| - ++currentCharacter;
|
| - if (!isCSSEscape(*currentCharacter))
|
| - return 0;
|
| -
|
| - if (isASCIIHexDigit(*currentCharacter)) {
|
| - int length = 6;
|
| -
|
| - do {
|
| - ++currentCharacter;
|
| - } while (isASCIIHexDigit(*currentCharacter) && --length);
|
| -
|
| - // Optional space after the escape sequence.
|
| - if (isHTMLSpace<CharacterType>(*currentCharacter))
|
| - ++currentCharacter;
|
| - return currentCharacter;
|
| - }
|
| - return currentCharacter + 1;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter)
|
| -{
|
| - while (isHTMLSpace<CharacterType>(*currentCharacter))
|
| - ++currentCharacter;
|
| - return currentCharacter;
|
| -}
|
| -
|
| -// Main CSS tokenizer functions.
|
| -
|
| -template <>
|
| -inline LChar*& CSSTokenizer::currentCharacter<LChar>()
|
| -{
|
| - return m_currentCharacter8;
|
| -}
|
| -
|
| -template <>
|
| -inline UChar*& CSSTokenizer::currentCharacter<UChar>()
|
| -{
|
| - return m_currentCharacter16;
|
| -}
|
| -
|
| -UChar* CSSTokenizer::allocateStringBuffer16(size_t len)
|
| -{
|
| - // Allocates and returns a CSSTokenizer owned buffer for storing
|
| - // UTF-16 data. Used to get a suitable life span for UTF-16
|
| - // strings, identifiers and URIs created by the tokenizer.
|
| - OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]);
|
| -
|
| - UChar* bufferPtr = buffer.get();
|
| -
|
| - m_cssStrings16.append(buffer.release());
|
| - return bufferPtr;
|
| -}
|
| -
|
| -template <>
|
| -inline LChar* CSSTokenizer::dataStart<LChar>()
|
| -{
|
| - return m_dataStart8.get();
|
| -}
|
| -
|
| -template <>
|
| -inline UChar* CSSTokenizer::dataStart<UChar>()
|
| -{
|
| - return m_dataStart16.get();
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline CSSParserLocation CSSTokenizer::tokenLocation()
|
| -{
|
| - CSSParserLocation location;
|
| - location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterType>() - tokenStart<CharacterType>());
|
| - location.lineNumber = m_tokenStartLineNumber;
|
| - location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>();
|
| - return location;
|
| -}
|
| -
|
| -CSSParserLocation CSSTokenizer::currentLocation()
|
| -{
|
| - if (is8BitSource())
|
| - return tokenLocation<LChar>();
|
| - return tokenLocation<UChar>();
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline bool CSSTokenizer::isIdentifierStart()
|
| -{
|
| - // Check whether an identifier is started.
|
| - return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-') ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1);
|
| -}
|
| -
|
| -enum CheckStringValidationMode {
|
| - AbortIfInvalid,
|
| - SkipInvalid
|
| -};
|
| -
|
| -template <typename CharacterType>
|
| -static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter, int quote, CheckStringValidationMode mode)
|
| -{
|
| - // If mode is AbortIfInvalid and the string check fails it returns
|
| - // with 0. Otherwise it returns with a pointer to the first
|
| - // character after the string.
|
| - while (true) {
|
| - if (UNLIKELY(*currentCharacter == quote)) {
|
| - // String parsing is successful.
|
| - return currentCharacter + 1;
|
| - }
|
| - if (UNLIKELY(!*currentCharacter)) {
|
| - // String parsing is successful up to end of input.
|
| - return currentCharacter;
|
| - }
|
| - if (mode == AbortIfInvalid && UNLIKELY(*currentCharacter <= '\r' && (*currentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) {
|
| - // String parsing is failed for character '\n', '\f' or '\r'.
|
| - return 0;
|
| - }
|
| -
|
| - if (LIKELY(currentCharacter[0] != '\\')) {
|
| - ++currentCharacter;
|
| - } else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') {
|
| - currentCharacter += 2;
|
| - } else if (currentCharacter[1] == '\r') {
|
| - currentCharacter += currentCharacter[2] == '\n' ? 3 : 2;
|
| - } else {
|
| - CharacterType* next = checkAndSkipEscape(currentCharacter);
|
| - if (!next) {
|
| - if (mode == AbortIfInvalid)
|
| - return 0;
|
| - next = currentCharacter + 1;
|
| - }
|
| - currentCharacter = next;
|
| - }
|
| - }
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -unsigned CSSTokenizer::parseEscape(CharacterType*& src)
|
| -{
|
| - ASSERT(*src == '\\' && isCSSEscape(src[1]));
|
| -
|
| - unsigned unicode = 0;
|
| -
|
| - ++src;
|
| - if (isASCIIHexDigit(*src)) {
|
| -
|
| - int length = 6;
|
| -
|
| - do {
|
| - unicode = (unicode << 4) + toASCIIHexValue(*src++);
|
| - } while (--length && isASCIIHexDigit(*src));
|
| -
|
| - // Characters above 0x10ffff are not handled.
|
| - if (unicode > 0x10ffff)
|
| - unicode = 0xfffd;
|
| -
|
| - // Optional space after the escape sequence.
|
| - if (isHTMLSpace<CharacterType>(*src))
|
| - ++src;
|
| -
|
| - return unicode;
|
| - }
|
| -
|
| - return *src++;
|
| -}
|
| -
|
| -template <>
|
| -inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode)
|
| -{
|
| - ASSERT(unicode <= 0xff);
|
| - *result = unicode;
|
| -
|
| - ++result;
|
| -}
|
| -
|
| -template <>
|
| -inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode)
|
| -{
|
| - // Replace unicode with a surrogate pairs when it is bigger than 0xffff
|
| - if (U16_LENGTH(unicode) == 2) {
|
| - *result++ = U16_LEAD(unicode);
|
| - *result = U16_TRAIL(unicode);
|
| - } else {
|
| - *result = unicode;
|
| - }
|
| -
|
| - ++result;
|
| -}
|
| -
|
| -template <typename SrcCharacterType>
|
| -size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src)
|
| -{
|
| - // The decoded form of an identifier (after resolving escape
|
| - // sequences) will not contain more characters (ASCII or UTF-16
|
| - // codepoints) than the input. This code can therefore ignore
|
| - // escape sequences completely.
|
| - SrcCharacterType* start = src;
|
| - do {
|
| - if (LIKELY(*src != '\\'))
|
| - src++;
|
| - else
|
| - parseEscape<SrcCharacterType>(src);
|
| - } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
|
| -
|
| - return src - start;
|
| -}
|
| -
|
| -template <typename SrcCharacterType, typename DestCharacterType>
|
| -inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCharacterType*& result, bool& hasEscape)
|
| -{
|
| - hasEscape = false;
|
| - do {
|
| - if (LIKELY(*src != '\\')) {
|
| - *result++ = *src++;
|
| - } else {
|
| - hasEscape = true;
|
| - SrcCharacterType* savedEscapeStart = src;
|
| - unsigned unicode = parseEscape<SrcCharacterType>(src);
|
| - if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
|
| - src = savedEscapeStart;
|
| - return false;
|
| - }
|
| - UnicodeToChars(result, unicode);
|
| - }
|
| - } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
|
| -
|
| - return true;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserString& resultString, bool& hasEscape)
|
| -{
|
| - // If a valid identifier start is found, we can safely
|
| - // parse the identifier until the next invalid character.
|
| - ASSERT(isIdentifierStart<CharacterType>());
|
| -
|
| - CharacterType* start = currentCharacter<CharacterType>();
|
| - if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), result, hasEscape))) {
|
| - // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
|
| - ASSERT(is8BitSource());
|
| - UChar* result16 = allocateStringBuffer16((result - start) + peekMaxIdentifierLen(currentCharacter<CharacterType>()));
|
| - UChar* start16 = result16;
|
| - int i = 0;
|
| - for (; i < result - start; i++)
|
| - result16[i] = start[i];
|
| -
|
| - result16 += i;
|
| -
|
| - parseIdentifierInternal(currentCharacter<CharacterType>(), result16, hasEscape);
|
| -
|
| - resultString.init(start16, result16 - start16);
|
| -
|
| - return;
|
| - }
|
| -
|
| - resultString.init(start, result - start);
|
| -}
|
| -
|
| -template <typename SrcCharacterType>
|
| -size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote)
|
| -{
|
| - // The decoded form of a CSS string (after resolving escape
|
| - // sequences) will not contain more characters (ASCII or UTF-16
|
| - // codepoints) than the input. This code can therefore ignore
|
| - // escape sequences completely and just return the length of the
|
| - // input string (possibly including terminating quote if any).
|
| - SrcCharacterType* end = checkAndSkipString(src, quote, SkipInvalid);
|
| - return end ? end - src : 0;
|
| -}
|
| -
|
| -template <typename SrcCharacterType, typename DestCharacterType>
|
| -inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharacterType*& result, UChar quote)
|
| -{
|
| - while (true) {
|
| - if (UNLIKELY(*src == quote)) {
|
| - // String parsing is done.
|
| - ++src;
|
| - return true;
|
| - }
|
| - if (UNLIKELY(!*src)) {
|
| - // String parsing is done, but don't advance pointer if at the end of input.
|
| - return true;
|
| - }
|
| - if (LIKELY(src[0] != '\\')) {
|
| - *result++ = *src++;
|
| - } else if (src[1] == '\n' || src[1] == '\f') {
|
| - src += 2;
|
| - } else if (src[1] == '\r') {
|
| - src += src[2] == '\n' ? 3 : 2;
|
| - } else {
|
| - SrcCharacterType* savedEscapeStart = src;
|
| - unsigned unicode = parseEscape<SrcCharacterType>(src);
|
| - if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
|
| - src = savedEscapeStart;
|
| - return false;
|
| - }
|
| - UnicodeToChars(result, unicode);
|
| - }
|
| - }
|
| -
|
| - return true;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& resultString, UChar quote)
|
| -{
|
| - CharacterType* start = currentCharacter<CharacterType>();
|
| -
|
| - if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {
|
| - // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
|
| - ASSERT(is8BitSource());
|
| - UChar* result16 = allocateStringBuffer16((result - start) + peekMaxStringLen(currentCharacter<CharacterType>(), quote));
|
| - UChar* start16 = result16;
|
| - int i = 0;
|
| - for (; i < result - start; i++)
|
| - result16[i] = start[i];
|
| -
|
| - result16 += i;
|
| -
|
| - parseStringInternal(currentCharacter<CharacterType>(), result16, quote);
|
| -
|
| - resultString.init(start16, result16 - start16);
|
| - return;
|
| - }
|
| -
|
| - resultString.init(start, result - start);
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UChar& quote)
|
| -{
|
| - start = skipWhiteSpace(currentCharacter<CharacterType>());
|
| -
|
| - if (*start == '"' || *start == '\'') {
|
| - quote = *start++;
|
| - end = checkAndSkipString(start, quote, AbortIfInvalid);
|
| - if (!end)
|
| - return false;
|
| - } else {
|
| - quote = 0;
|
| - end = start;
|
| - while (isURILetter(*end)) {
|
| - if (LIKELY(*end != '\\')) {
|
| - ++end;
|
| - } else {
|
| - end = checkAndSkipEscape(end);
|
| - if (!end)
|
| - return false;
|
| - }
|
| - }
|
| - }
|
| -
|
| - end = skipWhiteSpace(end);
|
| - if (*end != ')')
|
| - return false;
|
| -
|
| - return true;
|
| -}
|
| -
|
| -template <typename SrcCharacterType>
|
| -inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote)
|
| -{
|
| - // The decoded form of a URI (after resolving escape sequences)
|
| - // will not contain more characters (ASCII or UTF-16 codepoints)
|
| - // than the input. This code can therefore ignore escape sequences
|
| - // completely.
|
| - SrcCharacterType* start = src;
|
| - if (quote) {
|
| - ASSERT(quote == '"' || quote == '\'');
|
| - return peekMaxStringLen(src, quote);
|
| - }
|
| -
|
| - while (isURILetter(*src)) {
|
| - if (LIKELY(*src != '\\'))
|
| - src++;
|
| - else
|
| - parseEscape<SrcCharacterType>(src);
|
| - }
|
| -
|
| - return src - start;
|
| -}
|
| -
|
| -template <typename SrcCharacterType, typename DestCharacterType>
|
| -inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacterType*& dest, UChar quote)
|
| -{
|
| - if (quote) {
|
| - ASSERT(quote == '"' || quote == '\'');
|
| - return parseStringInternal(src, dest, quote);
|
| - }
|
| -
|
| - while (isURILetter(*src)) {
|
| - if (LIKELY(*src != '\\')) {
|
| - *dest++ = *src++;
|
| - } else {
|
| - unsigned unicode = parseEscape<SrcCharacterType>(src);
|
| - if (unicode > 0xff && sizeof(DestCharacterType) == 1)
|
| - return false;
|
| - UnicodeToChars(dest, unicode);
|
| - }
|
| - }
|
| -
|
| - return true;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline void CSSTokenizer::parseURI(CSSParserString& string)
|
| -{
|
| - CharacterType* uriStart;
|
| - CharacterType* uriEnd;
|
| - UChar quote;
|
| - if (!findURI(uriStart, uriEnd, quote))
|
| - return;
|
| -
|
| - CharacterType* dest = currentCharacter<CharacterType>() = uriStart;
|
| - if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))) {
|
| - string.init(uriStart, dest - uriStart);
|
| - } else {
|
| - // An escape sequence was encountered that can't be stored in 8 bits.
|
| - // Reset the current character to the start of the URI and re-parse with
|
| - // a 16-bit destination.
|
| - ASSERT(is8BitSource());
|
| - currentCharacter<CharacterType>() = uriStart;
|
| - UChar* result16 = allocateStringBuffer16(peekMaxURILen(currentCharacter<CharacterType>(), quote));
|
| - UChar* uriStart16 = result16;
|
| - bool result = parseURIInternal(currentCharacter<CharacterType>(), result16, quote);
|
| - ASSERT_UNUSED(result, result);
|
| - string.init(uriStart16, result16 - uriStart16);
|
| - }
|
| -
|
| - currentCharacter<CharacterType>() = uriEnd + 1;
|
| - m_token = URI;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline bool CSSTokenizer::parseUnicodeRange()
|
| -{
|
| - CharacterType* character = currentCharacter<CharacterType>() + 1;
|
| - int length = 6;
|
| - ASSERT(*currentCharacter<CharacterType>() == '+');
|
| -
|
| - while (isASCIIHexDigit(*character) && length) {
|
| - ++character;
|
| - --length;
|
| - }
|
| -
|
| - if (length && *character == '?') {
|
| - // At most 5 hex digit followed by a question mark.
|
| - do {
|
| - ++character;
|
| - --length;
|
| - } while (*character == '?' && length);
|
| - currentCharacter<CharacterType>() = character;
|
| - return true;
|
| - }
|
| -
|
| - if (length < 6) {
|
| - // At least one hex digit.
|
| - if (character[0] == '-' && isASCIIHexDigit(character[1])) {
|
| - // Followed by a dash and a hex digit.
|
| - ++character;
|
| - length = 6;
|
| - do {
|
| - ++character;
|
| - } while (--length && isASCIIHexDigit(*character));
|
| - }
|
| - currentCharacter<CharacterType>() = character;
|
| - return true;
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -bool CSSTokenizer::parseNthChild()
|
| -{
|
| - CharacterType* character = currentCharacter<CharacterType>();
|
| -
|
| - while (isASCIIDigit(*character))
|
| - ++character;
|
| - if (isASCIIAlphaCaselessEqual(*character, 'n')) {
|
| - currentCharacter<CharacterType>() = character + 1;
|
| - return true;
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -bool CSSTokenizer::parseNthChildExtra()
|
| -{
|
| - CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>());
|
| - if (*character != '+' && *character != '-')
|
| - return false;
|
| -
|
| - character = skipWhiteSpace(character + 1);
|
| - if (!isASCIIDigit(*character))
|
| - return false;
|
| -
|
| - do {
|
| - ++character;
|
| - } while (isASCIIDigit(*character));
|
| -
|
| - currentCharacter<CharacterType>() = character;
|
| - return true;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline bool CSSTokenizer::detectFunctionTypeToken(int length)
|
| -{
|
| - ASSERT(length > 0);
|
| - CharacterType* name = tokenStart<CharacterType>();
|
| - SWITCH(name, length) {
|
| - CASE("not") {
|
| - m_token = NOTFUNCTION;
|
| - return true;
|
| - }
|
| - CASE("url") {
|
| - m_token = URI;
|
| - return true;
|
| - }
|
| - CASE("cue") {
|
| - m_token = CUEFUNCTION;
|
| - return true;
|
| - }
|
| - CASE("calc") {
|
| - m_token = CALCFUNCTION;
|
| - return true;
|
| - }
|
| - CASE("host") {
|
| - m_token = HOSTFUNCTION;
|
| - return true;
|
| - }
|
| - CASE("host-context") {
|
| - m_token = HOSTCONTEXTFUNCTION;
|
| - return true;
|
| - }
|
| - CASE("nth-child") {
|
| - m_parsingMode = NthChildMode;
|
| - return true;
|
| - }
|
| - CASE("nth-of-type") {
|
| - m_parsingMode = NthChildMode;
|
| - return true;
|
| - }
|
| - CASE("nth-last-child") {
|
| - m_parsingMode = NthChildMode;
|
| - return true;
|
| - }
|
| - CASE("nth-last-of-type") {
|
| - m_parsingMode = NthChildMode;
|
| - return true;
|
| - }
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline void CSSTokenizer::detectMediaQueryToken(int length)
|
| -{
|
| - ASSERT(m_parsingMode == MediaQueryMode);
|
| - CharacterType* name = tokenStart<CharacterType>();
|
| -
|
| - SWITCH(name, length) {
|
| - CASE("and") {
|
| - m_token = MEDIA_AND;
|
| - }
|
| - CASE("not") {
|
| - m_token = MEDIA_NOT;
|
| - }
|
| - CASE("only") {
|
| - m_token = MEDIA_ONLY;
|
| - }
|
| - CASE("or") {
|
| - m_token = MEDIA_OR;
|
| - }
|
| - }
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length)
|
| -{
|
| - ASSERT(length > 0);
|
| -
|
| - SWITCH(type, length) {
|
| - CASE("cm") {
|
| - m_token = CMS;
|
| - }
|
| - CASE("ch") {
|
| - m_token = CHS;
|
| - }
|
| - CASE("deg") {
|
| - m_token = DEGS;
|
| - }
|
| - CASE("dppx") {
|
| - // There is a discussion about the name of this unit on www-style.
|
| - // Keep this compile time guard in place until that is resolved.
|
| - // http://lists.w3.org/Archives/Public/www-style/2012May/0915.html
|
| - m_token = DPPX;
|
| - }
|
| - CASE("dpcm") {
|
| - m_token = DPCM;
|
| - }
|
| - CASE("dpi") {
|
| - m_token = DPI;
|
| - }
|
| - CASE("em") {
|
| - m_token = EMS;
|
| - }
|
| - CASE("ex") {
|
| - m_token = EXS;
|
| - }
|
| - CASE("fr") {
|
| - m_token = FR;
|
| - }
|
| - CASE("grad") {
|
| - m_token = GRADS;
|
| - }
|
| - CASE("hz") {
|
| - m_token = HERTZ;
|
| - }
|
| - CASE("in") {
|
| - m_token = INS;
|
| - }
|
| - CASE("khz") {
|
| - m_token = KHERTZ;
|
| - }
|
| - CASE("mm") {
|
| - m_token = MMS;
|
| - }
|
| - CASE("ms") {
|
| - m_token = MSECS;
|
| - }
|
| - CASE("px") {
|
| - m_token = PXS;
|
| - }
|
| - CASE("pt") {
|
| - m_token = PTS;
|
| - }
|
| - CASE("pc") {
|
| - m_token = PCS;
|
| - }
|
| - CASE("rad") {
|
| - m_token = RADS;
|
| - }
|
| - CASE("rem") {
|
| - m_token = REMS;
|
| - }
|
| - CASE("s") {
|
| - m_token = SECS;
|
| - }
|
| - CASE("turn") {
|
| - m_token = TURNS;
|
| - }
|
| - CASE("vw") {
|
| - m_token = VW;
|
| - }
|
| - CASE("vh") {
|
| - m_token = VH;
|
| - }
|
| - CASE("vmin") {
|
| - m_token = VMIN;
|
| - }
|
| - CASE("vmax") {
|
| - m_token = VMAX;
|
| - }
|
| - CASE("__qem") {
|
| - m_token = QEMS;
|
| - }
|
| - }
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline void CSSTokenizer::detectDashToken(int length)
|
| -{
|
| - CharacterType* name = tokenStart<CharacterType>();
|
| -
|
| - // Ignore leading dash.
|
| - ++name;
|
| - --length;
|
| -
|
| - SWITCH(name, length) {
|
| - CASE("webkit-any") {
|
| - m_token = ANYFUNCTION;
|
| - }
|
| - CASE("webkit-calc") {
|
| - m_token = CALCFUNCTION;
|
| - }
|
| - }
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline void CSSTokenizer::detectAtToken(int length, bool hasEscape)
|
| -{
|
| - CharacterType* name = tokenStart<CharacterType>();
|
| - ASSERT(name[0] == '@' && length >= 2);
|
| -
|
| - // Ignore leading @.
|
| - ++name;
|
| - --length;
|
| -
|
| - // charset, font-face, import, media, namespace, page, supports,
|
| - // -webkit-keyframes, keyframes, and -webkit-mediaquery are not affected by hasEscape.
|
| - SWITCH(name, length) {
|
| - CASE("bottom-left") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = BOTTOMLEFT_SYM;
|
| - }
|
| - CASE("bottom-right") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = BOTTOMRIGHT_SYM;
|
| - }
|
| - CASE("bottom-center") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = BOTTOMCENTER_SYM;
|
| - }
|
| - CASE("bottom-left-corner") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = BOTTOMLEFTCORNER_SYM;
|
| - }
|
| - CASE("bottom-right-corner") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = BOTTOMRIGHTCORNER_SYM;
|
| - }
|
| - CASE("charset") {
|
| - if (name - 1 == dataStart<CharacterType>())
|
| - m_token = CHARSET_SYM;
|
| - }
|
| - CASE("font-face") {
|
| - m_token = FONT_FACE_SYM;
|
| - }
|
| - CASE("import") {
|
| - m_parsingMode = MediaQueryMode;
|
| - m_token = IMPORT_SYM;
|
| - }
|
| - CASE("keyframes") {
|
| - if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled())
|
| - m_token = KEYFRAMES_SYM;
|
| - }
|
| - CASE("left-top") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = LEFTTOP_SYM;
|
| - }
|
| - CASE("left-middle") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = LEFTMIDDLE_SYM;
|
| - }
|
| - CASE("left-bottom") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = LEFTBOTTOM_SYM;
|
| - }
|
| - CASE("media") {
|
| - m_parsingMode = MediaQueryMode;
|
| - m_token = MEDIA_SYM;
|
| - }
|
| - CASE("namespace") {
|
| - m_token = NAMESPACE_SYM;
|
| - }
|
| - CASE("page") {
|
| - m_token = PAGE_SYM;
|
| - }
|
| - CASE("right-top") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = RIGHTTOP_SYM;
|
| - }
|
| - CASE("right-middle") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = RIGHTMIDDLE_SYM;
|
| - }
|
| - CASE("right-bottom") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = RIGHTBOTTOM_SYM;
|
| - }
|
| - CASE("supports") {
|
| - m_parsingMode = SupportsMode;
|
| - m_token = SUPPORTS_SYM;
|
| - }
|
| - CASE("top-left") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = TOPLEFT_SYM;
|
| - }
|
| - CASE("top-right") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = TOPRIGHT_SYM;
|
| - }
|
| - CASE("top-center") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = TOPCENTER_SYM;
|
| - }
|
| - CASE("top-left-corner") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = TOPLEFTCORNER_SYM;
|
| - }
|
| - CASE("top-right-corner") {
|
| - if (LIKELY(!hasEscape))
|
| - m_token = TOPRIGHTCORNER_SYM;
|
| - }
|
| - CASE("viewport") {
|
| - m_token = VIEWPORT_RULE_SYM;
|
| - }
|
| - CASE("-internal-rule") {
|
| - if (LIKELY(!hasEscape && m_internal))
|
| - m_token = INTERNAL_RULE_SYM;
|
| - }
|
| - CASE("-internal-decls") {
|
| - if (LIKELY(!hasEscape && m_internal))
|
| - m_token = INTERNAL_DECLS_SYM;
|
| - }
|
| - CASE("-internal-value") {
|
| - if (LIKELY(!hasEscape && m_internal))
|
| - m_token = INTERNAL_VALUE_SYM;
|
| - }
|
| - CASE("-webkit-keyframes") {
|
| - m_token = WEBKIT_KEYFRAMES_SYM;
|
| - }
|
| - CASE("-internal-selector") {
|
| - if (LIKELY(!hasEscape && m_internal))
|
| - m_token = INTERNAL_SELECTOR_SYM;
|
| - }
|
| - CASE("-internal-medialist") {
|
| - if (!m_internal)
|
| - return;
|
| - m_parsingMode = MediaQueryMode;
|
| - m_token = INTERNAL_MEDIALIST_SYM;
|
| - }
|
| - CASE("-internal-keyframe-rule") {
|
| - if (LIKELY(!hasEscape && m_internal))
|
| - m_token = INTERNAL_KEYFRAME_RULE_SYM;
|
| - }
|
| - CASE("-internal-keyframe-key-list") {
|
| - if (!m_internal)
|
| - return;
|
| - m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM;
|
| - }
|
| - CASE("-internal-supports-condition") {
|
| - if (!m_internal)
|
| - return;
|
| - m_parsingMode = SupportsMode;
|
| - m_token = INTERNAL_SUPPORTS_CONDITION_SYM;
|
| - }
|
| - }
|
| -}
|
| -
|
| -template <typename CharacterType>
|
| -inline void CSSTokenizer::detectSupportsToken(int length)
|
| -{
|
| - ASSERT(m_parsingMode == SupportsMode);
|
| - CharacterType* name = tokenStart<CharacterType>();
|
| -
|
| - SWITCH(name, length) {
|
| - CASE("or") {
|
| - m_token = SUPPORTS_OR;
|
| - }
|
| - CASE("and") {
|
| - m_token = SUPPORTS_AND;
|
| - }
|
| - CASE("not") {
|
| - m_token = SUPPORTS_NOT;
|
| - }
|
| - }
|
| -}
|
| -
|
| -template <typename SrcCharacterType>
|
| -int CSSTokenizer::realLex(void* yylvalWithoutType)
|
| -{
|
| - YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType);
|
| - // Write pointer for the next character.
|
| - SrcCharacterType* result;
|
| - CSSParserString resultString;
|
| - bool hasEscape;
|
| -
|
| - // The input buffer is terminated by a \0 character, so
|
| - // it is safe to read one character ahead of a known non-null.
|
| -#if ENABLE(ASSERT)
|
| - // In debug we check with an ASSERT that the length is > 0 for string types.
|
| - yylval->string.clear();
|
| -#endif
|
| -
|
| -restartAfterComment:
|
| - result = currentCharacter<SrcCharacterType>();
|
| - setTokenStart(result);
|
| - m_tokenStartLineNumber = m_lineNumber;
|
| - m_token = *currentCharacter<SrcCharacterType>();
|
| - ++currentCharacter<SrcCharacterType>();
|
| -
|
| - switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdentifierStart) {
|
| - case CharacterCaselessU:
|
| - if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) {
|
| - if (parseUnicodeRange<SrcCharacterType>()) {
|
| - m_token = UNICODERANGE;
|
| - yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
|
| - break;
|
| - }
|
| - }
|
| - // Fall through to CharacterIdentifierStart.
|
| -
|
| - case CharacterIdentifierStart:
|
| - --currentCharacter<SrcCharacterType>();
|
| - parseIdentifier(result, yylval->string, hasEscape);
|
| - m_token = IDENT;
|
| -
|
| - if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) {
|
| - if (m_parsingMode == SupportsMode && !hasEscape) {
|
| - detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
|
| - if (m_token != IDENT)
|
| - break;
|
| - }
|
| -
|
| - m_token = FUNCTION;
|
| - if (!hasEscape)
|
| - detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
|
| -
|
| - // Skip parenthesis
|
| - ++currentCharacter<SrcCharacterType>();
|
| - ++result;
|
| - ++yylval->string.m_length;
|
| -
|
| - if (m_token == URI) {
|
| - m_token = FUNCTION;
|
| - // Check whether it is really an URI.
|
| - if (yylval->string.is8Bit())
|
| - parseURI<LChar>(yylval->string);
|
| - else
|
| - parseURI<UChar>(yylval->string);
|
| - }
|
| - } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) {
|
| - if (m_parsingMode == MediaQueryMode) {
|
| - detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
|
| - } else if (m_parsingMode == SupportsMode) {
|
| - detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
|
| - } else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[0], 'n')) {
|
| - if (result - tokenStart<SrcCharacterType>() == 1) {
|
| - // String "n" is IDENT but "n+1" is NTH.
|
| - if (parseNthChildExtra<SrcCharacterType>()) {
|
| - m_token = NTH;
|
| - yylval->string.m_length = currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>();
|
| - }
|
| - } else if (result - tokenStart<SrcCharacterType>() >= 2 && tokenStart<SrcCharacterType>()[1] == '-') {
|
| - // String "n-" is IDENT but "n-1" is NTH.
|
| - // Set currentCharacter to '-' to continue parsing.
|
| - SrcCharacterType* nextCharacter = result;
|
| - currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 1;
|
| - if (parseNthChildExtra<SrcCharacterType>()) {
|
| - m_token = NTH;
|
| - yylval->string.setLength(currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
|
| - } else {
|
| - // Revert the change to currentCharacter if unsuccessful.
|
| - currentCharacter<SrcCharacterType>() = nextCharacter;
|
| - }
|
| - }
|
| - }
|
| - }
|
| - break;
|
| -
|
| - case CharacterDot:
|
| - if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0]))
|
| - break;
|
| - // Fall through to CharacterNumber.
|
| -
|
| - case CharacterNumber: {
|
| - bool dotSeen = (m_token == '.');
|
| -
|
| - while (true) {
|
| - if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) {
|
| - // Only one dot is allowed for a number,
|
| - // and it must be followed by a digit.
|
| - if (currentCharacter<SrcCharacterType>()[0] != '.' || dotSeen || !isASCIIDigit(currentCharacter<SrcCharacterType>()[1]))
|
| - break;
|
| - dotSeen = true;
|
| - }
|
| - ++currentCharacter<SrcCharacterType>();
|
| - }
|
| -
|
| - if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaCaselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) {
|
| - // "[0-9]+n" is always an NthChild.
|
| - ++currentCharacter<SrcCharacterType>();
|
| - parseNthChildExtra<SrcCharacterType>();
|
| - m_token = NTH;
|
| - yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
|
| - break;
|
| - }
|
| -
|
| - // Use SVG parser for numbers on SVG presentation attributes.
|
| - if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) {
|
| - // We need to take care of units like 'em' or 'ex'.
|
| - SrcCharacterType* character = currentCharacter<SrcCharacterType>();
|
| - if (isASCIIAlphaCaselessEqual(*character, 'e')) {
|
| - ASSERT(character - tokenStart<SrcCharacterType>() > 0);
|
| - ++character;
|
| - if (*character == '-' || *character == '+' || isASCIIDigit(*character)) {
|
| - ++character;
|
| - while (isASCIIDigit(*character))
|
| - ++character;
|
| - // Use FLOATTOKEN if the string contains exponents.
|
| - dotSeen = true;
|
| - currentCharacter<SrcCharacterType>() = character;
|
| - }
|
| - }
|
| - if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - tokenStart<SrcCharacterType>(), yylval->number))
|
| - break;
|
| - } else {
|
| - yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
|
| - }
|
| -
|
| - // Type of the function.
|
| - if (isIdentifierStart<SrcCharacterType>()) {
|
| - SrcCharacterType* type = currentCharacter<SrcCharacterType>();
|
| - result = currentCharacter<SrcCharacterType>();
|
| -
|
| - parseIdentifier(result, resultString, hasEscape);
|
| -
|
| - m_token = DIMEN;
|
| - if (!hasEscape)
|
| - detectNumberToken(type, currentCharacter<SrcCharacterType>() - type);
|
| -
|
| - if (m_token == DIMEN) {
|
| - // The decoded number is overwritten, but this is intentional.
|
| - yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
|
| - }
|
| - } else if (*currentCharacter<SrcCharacterType>() == '%') {
|
| - // Although the CSS grammar says {num}% we follow
|
| - // webkit at the moment which uses {num}%+.
|
| - do {
|
| - ++currentCharacter<SrcCharacterType>();
|
| - } while (*currentCharacter<SrcCharacterType>() == '%');
|
| - m_token = PERCENTAGE;
|
| - } else {
|
| - m_token = dotSeen ? FLOATTOKEN : INTEGER;
|
| - }
|
| - break;
|
| - }
|
| -
|
| - case CharacterDash:
|
| - if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) {
|
| - --currentCharacter<SrcCharacterType>();
|
| - parseIdentifier(result, resultString, hasEscape);
|
| - m_token = IDENT;
|
| -
|
| - if (*currentCharacter<SrcCharacterType>() == '(') {
|
| - m_token = FUNCTION;
|
| - if (!hasEscape)
|
| - detectDashToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
|
| - ++currentCharacter<SrcCharacterType>();
|
| - ++result;
|
| - } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) {
|
| - if (result - tokenStart<SrcCharacterType>() == 2) {
|
| - // String "-n" is IDENT but "-n+1" is NTH.
|
| - if (parseNthChildExtra<SrcCharacterType>()) {
|
| - m_token = NTH;
|
| - result = currentCharacter<SrcCharacterType>();
|
| - }
|
| - } else if (result - tokenStart<SrcCharacterType>() >= 3 && tokenStart<SrcCharacterType>()[2] == '-') {
|
| - // String "-n-" is IDENT but "-n-1" is NTH.
|
| - // Set currentCharacter to second '-' of '-n-' to continue parsing.
|
| - SrcCharacterType* nextCharacter = result;
|
| - currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 2;
|
| - if (parseNthChildExtra<SrcCharacterType>()) {
|
| - m_token = NTH;
|
| - result = currentCharacter<SrcCharacterType>();
|
| - } else {
|
| - // Revert the change to currentCharacter if unsuccessful.
|
| - currentCharacter<SrcCharacterType>() = nextCharacter;
|
| - }
|
| - }
|
| - }
|
| - resultString.setLength(result - tokenStart<SrcCharacterType>());
|
| - yylval->string = resultString;
|
| - } else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentCharacter<SrcCharacterType>()[1] == '>') {
|
| - currentCharacter<SrcCharacterType>() += 2;
|
| - m_token = SGML_CD;
|
| - } else if (UNLIKELY(m_parsingMode == NthChildMode)) {
|
| - // "-[0-9]+n" is always an NthChild.
|
| - if (parseNthChild<SrcCharacterType>()) {
|
| - parseNthChildExtra<SrcCharacterType>();
|
| - m_token = NTH;
|
| - yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
|
| - }
|
| - }
|
| - break;
|
| -
|
| - case CharacterOther:
|
| - // m_token is simply the current character.
|
| - break;
|
| -
|
| - case CharacterNull:
|
| - // Do not advance pointer at the end of input.
|
| - --currentCharacter<SrcCharacterType>();
|
| - break;
|
| -
|
| - case CharacterWhiteSpace:
|
| - m_token = WHITESPACE;
|
| - // Might start with a '\n'.
|
| - --currentCharacter<SrcCharacterType>();
|
| - do {
|
| - if (*currentCharacter<SrcCharacterType>() == '\n')
|
| - ++m_lineNumber;
|
| - ++currentCharacter<SrcCharacterType>();
|
| - } while (*currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICharacters[*currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace));
|
| - break;
|
| -
|
| - case CharacterEndMediaQueryOrSupports:
|
| - if (m_parsingMode == MediaQueryMode || m_parsingMode == SupportsMode)
|
| - m_parsingMode = NormalMode;
|
| - break;
|
| -
|
| - case CharacterEndNthChild:
|
| - if (m_parsingMode == NthChildMode)
|
| - m_parsingMode = NormalMode;
|
| - break;
|
| -
|
| - case CharacterQuote:
|
| - if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token, AbortIfInvalid)) {
|
| - ++result;
|
| - parseString<SrcCharacterType>(result, yylval->string, m_token);
|
| - m_token = STRING;
|
| - }
|
| - break;
|
| -
|
| - case CharacterExclamationMark: {
|
| - SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterType>());
|
| - if (isEqualToCSSIdentifier(start, "important")) {
|
| - m_token = IMPORTANT_SYM;
|
| - currentCharacter<SrcCharacterType>() = start + 9;
|
| - }
|
| - break;
|
| - }
|
| -
|
| - case CharacterHashmark: {
|
| - SrcCharacterType* start = currentCharacter<SrcCharacterType>();
|
| - result = currentCharacter<SrcCharacterType>();
|
| -
|
| - if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) {
|
| - // This must be a valid hex number token.
|
| - do {
|
| - ++currentCharacter<SrcCharacterType>();
|
| - } while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>()));
|
| - m_token = HEX;
|
| - yylval->string.init(start, currentCharacter<SrcCharacterType>() - start);
|
| - } else if (isIdentifierStart<SrcCharacterType>()) {
|
| - m_token = IDSEL;
|
| - parseIdentifier(result, yylval->string, hasEscape);
|
| - if (!hasEscape) {
|
| - // Check whether the identifier is also a valid hex number.
|
| - SrcCharacterType* current = start;
|
| - m_token = HEX;
|
| - do {
|
| - if (!isASCIIHexDigit(*current)) {
|
| - m_token = IDSEL;
|
| - break;
|
| - }
|
| - ++current;
|
| - } while (current < result);
|
| - }
|
| - }
|
| - break;
|
| - }
|
| -
|
| - case CharacterSlash:
|
| - // Ignore comments. They are not even considered as white spaces.
|
| - if (*currentCharacter<SrcCharacterType>() == '*') {
|
| - const CSSParserLocation startLocation = currentLocation();
|
| - if (m_parser.m_observer) {
|
| - unsigned startOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>() - 1; // Start with a slash.
|
| - m_parser.m_observer->startComment(startOffset - m_parsedTextPrefixLength);
|
| - }
|
| - ++currentCharacter<SrcCharacterType>();
|
| - while (currentCharacter<SrcCharacterType>()[0] != '*' || currentCharacter<SrcCharacterType>()[1] != '/') {
|
| - if (*currentCharacter<SrcCharacterType>() == '\n')
|
| - ++m_lineNumber;
|
| - if (*currentCharacter<SrcCharacterType>() == '\0') {
|
| - // Unterminated comments are simply ignored.
|
| - currentCharacter<SrcCharacterType>() -= 2;
|
| - m_parser.reportError(startLocation, UnterminatedCommentCSSError);
|
| - break;
|
| - }
|
| - ++currentCharacter<SrcCharacterType>();
|
| - }
|
| - currentCharacter<SrcCharacterType>() += 2;
|
| - if (m_parser.m_observer) {
|
| - unsigned endOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>();
|
| - unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength);
|
| - m_parser.m_observer->endComment(std::min(endOffset, userTextEndOffset) - m_parsedTextPrefixLength);
|
| - }
|
| - goto restartAfterComment;
|
| - }
|
| - break;
|
| -
|
| - case CharacterDollar:
|
| - if (*currentCharacter<SrcCharacterType>() == '=') {
|
| - ++currentCharacter<SrcCharacterType>();
|
| - m_token = ENDSWITH;
|
| - }
|
| - break;
|
| -
|
| - case CharacterAsterisk:
|
| - if (*currentCharacter<SrcCharacterType>() == '=') {
|
| - ++currentCharacter<SrcCharacterType>();
|
| - m_token = CONTAINS;
|
| - }
|
| - break;
|
| -
|
| - case CharacterPlus:
|
| - if (UNLIKELY(m_parsingMode == NthChildMode)) {
|
| - // Simplest case. "+[0-9]*n" is always NthChild.
|
| - if (parseNthChild<SrcCharacterType>()) {
|
| - parseNthChildExtra<SrcCharacterType>();
|
| - m_token = NTH;
|
| - yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
|
| - }
|
| - }
|
| - break;
|
| -
|
| - case CharacterLess:
|
| - if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<SrcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-') {
|
| - currentCharacter<SrcCharacterType>() += 3;
|
| - m_token = SGML_CD;
|
| - }
|
| - break;
|
| -
|
| - case CharacterAt:
|
| - if (isIdentifierStart<SrcCharacterType>()) {
|
| - m_token = ATKEYWORD;
|
| - ++result;
|
| - parseIdentifier(result, resultString, hasEscape);
|
| - // The standard enables unicode escapes in at-rules. In this case only the resultString will contain the
|
| - // correct identifier, hence we have to use it to determine its length instead of the usual pointer arithmetic.
|
| - detectAtToken<SrcCharacterType>(resultString.length() + 1, hasEscape);
|
| - }
|
| - break;
|
| -
|
| - case CharacterBackSlash:
|
| - if (isCSSEscape(*currentCharacter<SrcCharacterType>())) {
|
| - --currentCharacter<SrcCharacterType>();
|
| - parseIdentifier(result, yylval->string, hasEscape);
|
| - m_token = IDENT;
|
| - }
|
| - break;
|
| -
|
| - case CharacterXor:
|
| - if (*currentCharacter<SrcCharacterType>() == '=') {
|
| - ++currentCharacter<SrcCharacterType>();
|
| - m_token = BEGINSWITH;
|
| - }
|
| - break;
|
| -
|
| - case CharacterVerticalBar:
|
| - if (*currentCharacter<SrcCharacterType>() == '=') {
|
| - ++currentCharacter<SrcCharacterType>();
|
| - m_token = DASHMATCH;
|
| - }
|
| - break;
|
| -
|
| - case CharacterTilde:
|
| - if (*currentCharacter<SrcCharacterType>() == '=') {
|
| - ++currentCharacter<SrcCharacterType>();
|
| - m_token = INCLUDES;
|
| - }
|
| - break;
|
| -
|
| - default:
|
| - ASSERT_NOT_REACHED();
|
| - break;
|
| - }
|
| -
|
| - return m_token;
|
| -}
|
| -
|
| -template <>
|
| -inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart)
|
| -{
|
| - m_tokenStart.ptr8 = tokenStart;
|
| -}
|
| -
|
| -template <>
|
| -inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart)
|
| -{
|
| - m_tokenStart.ptr16 = tokenStart;
|
| -}
|
| -
|
| -void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, const String& string, const char* suffix, unsigned suffixLength)
|
| -{
|
| - m_parsedTextPrefixLength = prefixLength;
|
| - m_parsedTextSuffixLength = suffixLength;
|
| - unsigned stringLength = string.length();
|
| - unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuffixLength + 1;
|
| - m_length = length;
|
| -
|
| - if (!stringLength || string.is8Bit()) {
|
| - m_dataStart8 = adoptArrayPtr(new LChar[length]);
|
| - for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
|
| - m_dataStart8[i] = prefix[i];
|
| -
|
| - if (stringLength)
|
| - memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.characters8(), stringLength * sizeof(LChar));
|
| -
|
| - unsigned start = m_parsedTextPrefixLength + stringLength;
|
| - unsigned end = start + suffixLength;
|
| - for (unsigned i = start; i < end; i++)
|
| - m_dataStart8[i] = suffix[i - start];
|
| -
|
| - m_dataStart8[length - 1] = 0;
|
| -
|
| - m_is8BitSource = true;
|
| - m_currentCharacter8 = m_dataStart8.get();
|
| - m_currentCharacter16 = 0;
|
| - setTokenStart<LChar>(m_currentCharacter8);
|
| - m_lexFunc = &CSSTokenizer::realLex<LChar>;
|
| - return;
|
| - }
|
| -
|
| - m_dataStart16 = adoptArrayPtr(new UChar[length]);
|
| - for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
|
| - m_dataStart16[i] = prefix[i];
|
| -
|
| - ASSERT(stringLength);
|
| - memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16(), stringLength * sizeof(UChar));
|
| -
|
| - unsigned start = m_parsedTextPrefixLength + stringLength;
|
| - unsigned end = start + suffixLength;
|
| - for (unsigned i = start; i < end; i++)
|
| - m_dataStart16[i] = suffix[i - start];
|
| -
|
| - m_dataStart16[length - 1] = 0;
|
| -
|
| - m_is8BitSource = false;
|
| - m_currentCharacter8 = 0;
|
| - m_currentCharacter16 = m_dataStart16.get();
|
| - setTokenStart<UChar>(m_currentCharacter16);
|
| - m_lexFunc = &CSSTokenizer::realLex<UChar>;
|
| -}
|
| -
|
| -} // namespace blink
|
|
|