| Index: Source/core/css/parser/MediaQueryTokenizer.cpp
|
| diff --git a/Source/core/css/parser/MediaQueryTokenizer.cpp b/Source/core/css/parser/MediaQueryTokenizer.cpp
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..b478799695e22cbd1b0630aa1b2693bcd382375b
|
| --- /dev/null
|
| +++ b/Source/core/css/parser/MediaQueryTokenizer.cpp
|
| @@ -0,0 +1,403 @@
|
| +/*
|
| + * Copyright (C) 2013 Google Inc. All rights reserved.
|
| + *
|
| + * Redistribution and use in source and binary forms, with or without
|
| + * modification, are permitted provided that the following conditions are
|
| + * met:
|
| + *
|
| + * * Redistributions of source code must retain the above copyright
|
| + * notice, this list of conditions and the following disclaimer.
|
| + * * Redistributions in binary form must reproduce the above
|
| + * copyright notice, this list of conditions and the following disclaimer
|
| + * in the documentation and/or other materials provided with the
|
| + * distribution.
|
| + * * Neither the name of Google Inc. nor the names of its
|
| + * contributors may be used to endorse or promote products derived from
|
| + * this software without specific prior written permission.
|
| + *
|
| + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| + */
|
| +
|
| +#include "config.h"
|
| +#include "core/css/parser/MediaQueryTokenizer.h"
|
| +
|
| +#include "core/css/parser/CSSInputStream.h"
|
| +#include "core/html/parser/HTMLParserIdioms.h"
|
| +#include "wtf/unicode/CharacterNames.h"
|
| +#include <cfloat>
|
| +
|
| +namespace WebCore {
|
| +
|
| +// http://dev.w3.org/csswg/css-syntax/#name-start-code-point
|
| +static bool isNameStart(UChar c)
|
| +{
|
| + if (isASCIIAlpha(c))
|
| + return true;
|
| + if (c == '_')
|
| + return true;
|
| + return !isASCII(c);
|
| +}
|
| +
|
| +// http://www.w3.org/TR/css-syntax-3/#name-code-point
|
| +static bool isNameChar(UChar c)
|
| +{
|
| + return isNameStart(c) || isASCIIDigit(c) || c == '-';
|
| +}
|
| +
|
| +// http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escape
|
| +static bool twoCharsAreValidEscape(UChar first, UChar second)
|
| +{
|
| + return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker));
|
| +}
|
| +
|
| +MediaQueryTokenizer::MediaQueryTokenizer()
|
| +{
|
| +}
|
| +
|
| +void MediaQueryTokenizer::reconsume(UChar c)
|
| +{
|
| + m_input->pushBack(c);
|
| +}
|
| +
|
| +UChar MediaQueryTokenizer::consume()
|
| +{
|
| + UChar current = m_input->currentInputChar();
|
| + m_input->advance();
|
| + return current;
|
| +}
|
| +
|
| +void MediaQueryTokenizer::consume(unsigned offset)
|
| +{
|
| + m_input->advance(offset);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::whiteSpace(UChar cc)
|
| +{
|
| + // CSS Tokenization is currently lossy, but we could record
|
| + // the exact whitespace instead of discarding it here.
|
| + consumeUntilNotWhitespace();
|
| + return CSSToken(WhitespaceToken);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::leftParen(UChar cc)
|
| +{
|
| + return CSSToken(LeftParenToken);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::rightParen(UChar cc)
|
| +{
|
| + return CSSToken(RightParenToken);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::plusOrFullStop(UChar cc)
|
| +{
|
| + if (nextCharsAreNumber()) {
|
| + reconsume(cc);
|
| + return consumeNumericToken();
|
| + }
|
| + return CSSToken(DelimToken, cc);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::comma(UChar cc)
|
| +{
|
| + return CSSToken(CommaToken);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::hyphenMinus(UChar cc)
|
| +{
|
| + if (nextCharsAreNumber()) {
|
| + reconsume(cc);
|
| + return consumeNumericToken();
|
| + }
|
| + if (nextCharsAreIdentifier()) {
|
| + reconsume(cc);
|
| + return consumeIdentLikeToken();
|
| + }
|
| + return CSSToken(DelimToken, cc);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::solidus(UChar cc)
|
| +{
|
| + return CSSToken(DelimToken, cc);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::colon(UChar cc)
|
| +{
|
| + return CSSToken(ColonToken);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::semiColon(UChar cc)
|
| +{
|
| + return CSSToken(SemicolonToken);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::reverseSolidus(UChar cc)
|
| +{
|
| + if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) {
|
| + reconsume(cc);
|
| + return consumeIdentLikeToken();
|
| + }
|
| + return CSSToken(DelimToken, cc);
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::asciiDigit(UChar cc)
|
| +{
|
| + reconsume(cc);
|
| + return consumeNumericToken();
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::nameStart(UChar cc)
|
| +{
|
| + reconsume(cc);
|
| + return consumeIdentLikeToken();
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::endOfFile(UChar cc)
|
| +{
|
| + return CSSToken(EOFToken);
|
| +}
|
| +
|
| +void MediaQueryTokenizer::tokenize(String string, Vector<CSSToken>& outTokens)
|
| +{
|
| + MediaQueryTokenizer tokenizer;
|
| + // According to the spec, we should perform preprocessing here.
|
| + // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing
|
| + //
|
| + // However, we can skip this step since:
|
| + // * We're using HTML spaces (which accept \r and \f as a valid white space)
|
| + // * Do not count white spaces
|
| + // * consumeEscape replaces NULLs for replacement characters
|
| +
|
| + CSSInputStream input(string);
|
| + while (true) {
|
| + outTokens.append(tokenizer.nextToken(input));
|
| + if (outTokens.last().type() == EOFToken)
|
| + return;
|
| + }
|
| +}
|
| +
|
| +CSSToken MediaQueryTokenizer::nextToken(CSSInputStream& input)
|
| +{
|
| + // Unlike the HTMLTokenizer, the CSS Syntax spec is written
|
| + // as a stateless, (fixed-size) look-ahead tokenizer.
|
| + // We could move to the stateful model and instead create
|
| + // states for all the "next 3 codepoints are X" cases.
|
| + // State-machine tokenizers are easier to write to handle
|
| + // incremental tokenization of partial sources.
|
| + // However, for now we follow the spec exactly.
|
| + m_input = &input;
|
| + UChar cc = consume();
|
| + CodePoint codePointFunc = 0;
|
| +
|
| + if (isASCII(cc)) {
|
| + ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM);
|
| + codePointFunc = getCodePoints()->codePoints[cc];
|
| + } else {
|
| + codePointFunc = &MediaQueryTokenizer::nameStart;
|
| + }
|
| +
|
| + if (codePointFunc)
|
| + return ((this)->*(codePointFunc))(cc);
|
| +
|
| + return CSSToken(DelimToken, cc);
|
| +}
|
| +
|
| +// This method merges the following spec sections for efficiency
|
| +// http://www.w3.org/TR/css3-syntax/#consume-a-number
|
| +// http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number
|
| +CSSToken MediaQueryTokenizer::consumeNumber()
|
| +{
|
| + ASSERT(nextCharsAreNumber());
|
| + NumericValueType type = IntegerValueType;
|
| + double value = 0;
|
| + int sign = 1;
|
| + unsigned peekOffset = 0;
|
| + int exponentSign = 1;
|
| + unsigned exponentStartPos = 0;
|
| + unsigned exponentEndPos = 0;
|
| + unsigned fractionStartPos = 0;
|
| + unsigned fractionEndPos = 0;
|
| + unsigned long long integerPart;
|
| + unsigned long long fractionPart;
|
| + unsigned fractionDigits;
|
| + unsigned long long exponentPart;
|
| + if (m_input->currentInputChar() == '+') {
|
| + ++peekOffset;
|
| + } else if (m_input->peek(peekOffset) == '-') {
|
| + sign = -1;
|
| + ++peekOffset;
|
| + }
|
| + unsigned intStartPos = peekOffset;
|
| + peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
|
| + unsigned intEndPos = peekOffset;
|
| + if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOffset))) {
|
| + fractionStartPos = peekOffset;
|
| + peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
|
| + fractionEndPos = peekOffset;
|
| + }
|
| + if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e')) {
|
| + int peekOffsetBeforeExponent = peekOffset;
|
| + ++peekOffset;
|
| + if (m_input->peek(peekOffset) == '+') {
|
| + ++peekOffset;
|
| + } else if (m_input->peek(peekOffset) =='-') {
|
| + exponentSign = -1;
|
| + ++peekOffset;
|
| + }
|
| + exponentStartPos = peekOffset;
|
| + peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
|
| + exponentEndPos = peekOffset;
|
| + if (exponentEndPos == exponentStartPos)
|
| + peekOffset = peekOffsetBeforeExponent;
|
| + }
|
| + integerPart = m_input->getNumber(intStartPos, intEndPos);
|
| + fractionDigits = fractionEndPos - fractionStartPos;
|
| + unsigned floatingFractionEndPos = fractionEndPos;
|
| + if (fractionDigits > DBL_DIG) {
|
| + // Limit the number of fraction digits, to avoid double (and fractionPart) from overflowing
|
| + fractionDigits = DBL_DIG;
|
| + floatingFractionEndPos = fractionStartPos + DBL_DIG;
|
| + }
|
| + fractionPart = m_input->getNumber(fractionStartPos, floatingFractionEndPos);
|
| + exponentPart = m_input->getNumber(exponentStartPos, exponentEndPos);
|
| + double fractionDivisor = pow((double)10.0, (double)(fractionDigits));
|
| + double exponent = pow(10, (float)exponentSign * (double)exponentPart);
|
| + value = (double)sign * ((double)integerPart + (double)fractionPart / fractionDivisor) * exponent;
|
| +
|
| + m_input->advance(peekOffset);
|
| + if (fractionDigits > 0)
|
| + type = NumberValueType;
|
| +
|
| + return CSSToken(NumberToken, value, type);
|
| +}
|
| +
|
| +// http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token
|
| +CSSToken MediaQueryTokenizer::consumeNumericToken()
|
| +{
|
| + CSSToken token = consumeNumber();
|
| + if (nextCharsAreIdentifier())
|
| + token.convertToDimensionWithUnit(consumeName());
|
| + else if (consumeIfNext('%'))
|
| + token.convertToPercentage();
|
| + return token;
|
| +}
|
| +
|
| +// http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token
|
| +CSSToken MediaQueryTokenizer::consumeIdentLikeToken()
|
| +{
|
| + String name = consumeName();
|
| + if (consumeIfNext('('))
|
| + return CSSToken(FunctionToken, name);
|
| + return CSSToken(IdentToken, name);
|
| +}
|
| +
|
| +void MediaQueryTokenizer::consumeUntilNotWhitespace()
|
| +{
|
| + // Using HTML space here rather than CSS space since we don't do preprocessing
|
| + while (isHTMLSpace<UChar>(m_input->currentInputChar()))
|
| + consume();
|
| +}
|
| +
|
| +bool MediaQueryTokenizer::consumeIfNext(UChar character)
|
| +{
|
| + if (m_input->currentInputChar() == character) {
|
| + consume();
|
| + return true;
|
| + }
|
| + return false;
|
| +}
|
| +
|
| +// http://www.w3.org/TR/css3-syntax/#consume-a-name
|
| +String MediaQueryTokenizer::consumeName()
|
| +{
|
| + // FIXME: Is this as efficient as it can be?
|
| + // The possibility of escape chars mandates a copy AFAICT.
|
| + Vector<UChar> result;
|
| + while (true) {
|
| + if (isNameChar(m_input->currentInputChar())) {
|
| + result.append(consume());
|
| + continue;
|
| + }
|
| + if (nextTwoCharsAreValidEscape()) {
|
| + // "consume()" fixes a spec bug.
|
| + // The first code point should be consumed before consuming the escaped code point.
|
| + consume();
|
| + result.append(consumeEscape());
|
| + continue;
|
| + }
|
| + return String(result);
|
| + }
|
| +}
|
| +
|
| +// http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point
|
| +UChar MediaQueryTokenizer::consumeEscape()
|
| +{
|
| + UChar cc = consume();
|
| + ASSERT(cc != '\n');
|
| + if (isASCIIHexDigit(cc)) {
|
| + unsigned consumedHexDigits = 1;
|
| + String hexChars;
|
| + do {
|
| + hexChars.append(cc);
|
| + cc = consume();
|
| + consumedHexDigits++;
|
| + } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));
|
| + bool ok = false;
|
| + UChar codePoint = hexChars.toUIntStrict(&ok, 16);
|
| + if (!ok)
|
| + return WTF::Unicode::replacementCharacter;
|
| + return codePoint;
|
| + }
|
| +
|
| + // Replaces NULLs with replacement characters, since we do not perform preprocessing
|
| + if (cc == kEndOfFileMarker)
|
| + return WTF::Unicode::replacementCharacter;
|
| + return cc;
|
| +}
|
| +
|
| +bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()
|
| +{
|
| + return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
|
| +}
|
| +
|
| +// http://www.w3.org/TR/css3-syntax/#starts-with-a-number
|
| +bool MediaQueryTokenizer::nextCharsAreNumber()
|
| +{
|
| + UChar first = m_input->currentInputChar();
|
| + UChar second = m_input->peek(1);
|
| + if (isASCIIDigit(first))
|
| + return true;
|
| + if (first == '+' || first == '-')
|
| + return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input->peek(2))));
|
| + if (first =='.')
|
| + return (isASCIIDigit(second));
|
| + return false;
|
| +}
|
| +
|
| +// http://www.w3.org/TR/css3-syntax/#would-start-an-identifier
|
| +bool MediaQueryTokenizer::nextCharsAreIdentifier()
|
| +{
|
| + UChar firstChar = m_input->currentInputChar();
|
| + if (isNameStart(firstChar) || nextTwoCharsAreValidEscape())
|
| + return true;
|
| +
|
| + if (firstChar == '-') {
|
| + if (isNameStart(m_input->peek(1)))
|
| + return true;
|
| + return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
|
| + }
|
| +
|
| + return false;
|
| +}
|
| +
|
| +} // namespace WebCore
|
|
|