Chromium Code Reviews| Index: Source/core/css/parser/MediaQueryTokenizer.cpp |
| diff --git a/Source/core/css/parser/MediaQueryTokenizer.cpp b/Source/core/css/parser/MediaQueryTokenizer.cpp |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..b3f4a647026b83cd74171fadd553523b05f2029c |
| --- /dev/null |
| +++ b/Source/core/css/parser/MediaQueryTokenizer.cpp |
| @@ -0,0 +1,370 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "config.h" |
| +#include "core/css/parser/MediaQueryTokenizer.h" |
| + |
| +#include "core/css/parser/MediaQueryInputStream.h" |
| +#include "core/html/parser/HTMLParserIdioms.h" |
| +#include "wtf/unicode/CharacterNames.h" |
| + |
| +namespace WebCore { |
| + |
| +// http://dev.w3.org/csswg/css-syntax/#name-start-code-point |
| +static bool isNameStart(UChar c) |
| +{ |
| + if (isASCIIAlpha(c)) |
| + return true; |
| + if (c == '_') |
| + return true; |
| + return !isASCII(c); |
| +} |
| + |
| +// http://www.w3.org/TR/css-syntax-3/#name-code-point |
| +static bool isNameChar(UChar c) |
| +{ |
| + return isNameStart(c) || isASCIIDigit(c) || c == '-'; |
| +} |
| + |
| +// http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escape |
| +static bool twoCharsAreValidEscape(UChar first, UChar second) |
| +{ |
| + return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)); |
| +} |
| + |
| +MediaQueryTokenizer::MediaQueryTokenizer() |
| +{ |
| +} |
| + |
| +void MediaQueryTokenizer::reconsume(UChar c) |
| +{ |
| + m_input->pushBack(c); |
| +} |
| + |
| +UChar MediaQueryTokenizer::consume() |
| +{ |
| + UChar current = m_input->currentInputChar(); |
| + m_input->advance(); |
| + return current; |
| +} |
| + |
| +void MediaQueryTokenizer::consume(unsigned offset) |
| +{ |
| + m_input->advance(offset); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc) |
| +{ |
| + // CSS Tokenization is currently lossy, but we could record |
| + // the exact whitespace instead of discarding it here. |
| + consumeUntilNotWhitespace(); |
| + return MediaQueryToken(WhitespaceToken); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::leftParen(UChar cc) |
| +{ |
| + return MediaQueryToken(LeftParenToken); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::rightParen(UChar cc) |
| +{ |
| + return MediaQueryToken(RightParenToken); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc) |
| +{ |
| + if (nextCharsAreNumber()) { |
| + reconsume(cc); |
| + return consumeNumericToken(); |
| + } |
| + return MediaQueryToken(DelimToken, cc); |
|
kenneth.r.christiansen
2014/03/08 22:37:47
DelimiterToken why not write it out, it is quite s
|
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::comma(UChar cc) |
| +{ |
| + return MediaQueryToken(CommaToken); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc) |
| +{ |
| + if (nextCharsAreNumber()) { |
| + reconsume(cc); |
| + return consumeNumericToken(); |
| + } |
| + if (nextCharsAreIdentifier()) { |
| + reconsume(cc); |
| + return consumeIdentLikeToken(); |
| + } |
| + return MediaQueryToken(DelimToken, cc); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::solidus(UChar cc) |
| +{ |
| + return MediaQueryToken(DelimToken, cc); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::colon(UChar cc) |
| +{ |
| + return MediaQueryToken(ColonToken); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc) |
| +{ |
| + return MediaQueryToken(SemicolonToken); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc) |
| +{ |
| + if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) { |
| + reconsume(cc); |
| + return consumeIdentLikeToken(); |
| + } |
| + return MediaQueryToken(DelimToken, cc); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc) |
| +{ |
| + reconsume(cc); |
| + return consumeNumericToken(); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc) |
| +{ |
| + reconsume(cc); |
| + return consumeIdentLikeToken(); |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc) |
| +{ |
| + return MediaQueryToken(EOFToken); |
| +} |
| + |
| +void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTokens) |
| +{ |
| + MediaQueryTokenizer tokenizer; |
| + // According to the spec, we should perform preprocessing here. |
| + // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing |
| + // |
| + // However, we can skip this step since: |
| + // * We're using HTML spaces (which accept \r and \f as a valid white space) |
| + // * Do not count white spaces |
| + // * consumeEscape replaces NULLs for replacement characters |
| + |
| + MediaQueryInputStream input(string); |
| + while (true) { |
| + outTokens.append(tokenizer.nextToken(input)); |
| + if (outTokens.last().type() == EOFToken) |
| + return; |
| + } |
| +} |
| + |
| +MediaQueryToken MediaQueryTokenizer::nextToken(MediaQueryInputStream& input) |
| +{ |
| + // Unlike the HTMLTokenizer, the CSS Syntax spec is written |
| + // as a stateless, (fixed-size) look-ahead tokenizer. |
| + // We could move to the stateful model and instead create |
| + // states for all the "next 3 codepoints are X" cases. |
| + // State-machine tokenizers are easier to write to handle |
| + // incremental tokenization of partial sources. |
| + // However, for now we follow the spec exactly. |
| + m_input = &input; |
| + UChar cc = consume(); |
| + CodePoint codePointFunc = 0; |
| + |
| + if (isASCII(cc)) { |
| + ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM); |
| + codePointFunc = getCodePoints()->codePoints[cc]; |
| + } else { |
| + codePointFunc = &MediaQueryTokenizer::nameStart; |
| + } |
| + |
| + if (codePointFunc) |
| + return ((this)->*(codePointFunc))(cc); |
| + |
| + return MediaQueryToken(DelimToken, cc); |
| +} |
| + |
| +// This method merges the following spec sections for efficiency |
| +// http://www.w3.org/TR/css3-syntax/#consume-a-number |
| +// http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number |
| +MediaQueryToken MediaQueryTokenizer::consumeNumber() |
| +{ |
| + ASSERT(nextCharsAreNumber()); |
| + NumericValueType type = IntegerValueType; |
| + double value = 0; |
| + int sign = 1; |
| + unsigned peekOffset = 0; |
| + int exponentSign = 1; |
| + unsigned exponentStartPos = 0; |
| + unsigned exponentEndPos = 0; |
| + unsigned fractionStartPos = 0; |
| + unsigned fractionEndPos = 0; |
| + unsigned long long integerPart; |
| + double fractionPart; |
| + unsigned fractionDigits; |
| + unsigned long long exponentPart; |
| + if (m_input->currentInputChar() == '+') { |
| + ++peekOffset; |
| + } else if (m_input->peek(peekOffset) == '-') { |
| + sign = -1; |
| + ++peekOffset; |
| + } |
| + unsigned intStartPos = peekOffset; |
| + peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); |
| + unsigned intEndPos = peekOffset; |
| + if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOffset))) { |
| + fractionStartPos = peekOffset - 1; |
| + peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); |
| + fractionEndPos = peekOffset; |
| + } |
| + if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e')) { |
| + int peekOffsetBeforeExponent = peekOffset; |
| + ++peekOffset; |
| + if (m_input->peek(peekOffset) == '+') { |
| + ++peekOffset; |
| + } else if (m_input->peek(peekOffset) =='-') { |
| + exponentSign = -1; |
| + ++peekOffset; |
| + } |
| + exponentStartPos = peekOffset; |
| + peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); |
| + exponentEndPos = peekOffset; |
| + if (exponentEndPos == exponentStartPos) |
| + peekOffset = peekOffsetBeforeExponent; |
| + } |
| + integerPart = m_input->getUInt(intStartPos, intEndPos); |
| + fractionDigits = fractionEndPos - fractionStartPos; |
| + unsigned floatingFractionEndPos = fractionEndPos; |
| + fractionPart = m_input->getDouble(fractionStartPos, floatingFractionEndPos); |
| + exponentPart = m_input->getUInt(exponentStartPos, exponentEndPos); |
| + double exponent = pow(10, (float)exponentSign * (double)exponentPart); |
| + value = (double)sign * ((double)integerPart + fractionPart) * exponent; |
| + |
| + m_input->advance(peekOffset); |
| + if (fractionDigits > 0) |
| + type = NumberValueType; |
| + |
| + return MediaQueryToken(NumberToken, value, type); |
| +} |
| + |
| +// http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token |
| +MediaQueryToken MediaQueryTokenizer::consumeNumericToken() |
| +{ |
| + MediaQueryToken token = consumeNumber(); |
| + if (nextCharsAreIdentifier()) |
| + token.convertToDimensionWithUnit(consumeName()); |
| + else if (consumeIfNext('%')) |
| + token.convertToPercentage(); |
| + return token; |
| +} |
| + |
| +// http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token |
| +MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken() |
| +{ |
| + String name = consumeName(); |
| + if (consumeIfNext('(')) |
| + return MediaQueryToken(FunctionToken, name); |
| + return MediaQueryToken(IdentToken, name); |
| +} |
| + |
| +void MediaQueryTokenizer::consumeUntilNotWhitespace() |
| +{ |
| + // Using HTML space here rather than CSS space since we don't do preprocessing |
| + while (isHTMLSpace<UChar>(m_input->currentInputChar())) |
| + consume(); |
| +} |
| + |
| +bool MediaQueryTokenizer::consumeIfNext(UChar character) |
| +{ |
| + if (m_input->currentInputChar() == character) { |
| + consume(); |
| + return true; |
| + } |
| + return false; |
| +} |
| + |
| +// http://www.w3.org/TR/css3-syntax/#consume-a-name |
| +String MediaQueryTokenizer::consumeName() |
| +{ |
| + // FIXME: Is this as efficient as it can be? |
| + // The possibility of escape chars mandates a copy AFAICT. |
| + Vector<UChar> result; |
| + while (true) { |
| + if (isNameChar(m_input->currentInputChar())) { |
| + result.append(consume()); |
| + continue; |
| + } |
| + if (nextTwoCharsAreValidEscape()) { |
| + // "consume()" fixes a spec bug. |
| + // The first code point should be consumed before consuming the escaped code point. |
| + consume(); |
| + result.append(consumeEscape()); |
| + continue; |
| + } |
| + return String(result); |
| + } |
| +} |
| + |
| +// http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point |
| +UChar MediaQueryTokenizer::consumeEscape() |
| +{ |
| + UChar cc = consume(); |
| + ASSERT(cc != '\n'); |
| + if (isASCIIHexDigit(cc)) { |
| + unsigned consumedHexDigits = 1; |
| + String hexChars; |
| + do { |
| + hexChars.append(cc); |
| + cc = consume(); |
| + consumedHexDigits++; |
| + } while (consumedHexDigits < 6 && isASCIIHexDigit(cc)); |
| + bool ok = false; |
| + UChar codePoint = hexChars.toUIntStrict(&ok, 16); |
| + if (!ok) |
| + return WTF::Unicode::replacementCharacter; |
| + return codePoint; |
| + } |
| + |
| + // Replaces NULLs with replacement characters, since we do not perform preprocessing |
| + if (cc == kEndOfFileMarker) |
| + return WTF::Unicode::replacementCharacter; |
| + return cc; |
| +} |
| + |
| +bool MediaQueryTokenizer::nextTwoCharsAreValidEscape() |
| +{ |
| + return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2)); |
| +} |
| + |
| +// http://www.w3.org/TR/css3-syntax/#starts-with-a-number |
| +bool MediaQueryTokenizer::nextCharsAreNumber() |
| +{ |
| + UChar first = m_input->currentInputChar(); |
| + UChar second = m_input->peek(1); |
| + if (isASCIIDigit(first)) |
| + return true; |
| + if (first == '+' || first == '-') |
| + return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input->peek(2)))); |
| + if (first =='.') |
| + return (isASCIIDigit(second)); |
| + return false; |
| +} |
| + |
| +// http://www.w3.org/TR/css3-syntax/#would-start-an-identifier |
| +bool MediaQueryTokenizer::nextCharsAreIdentifier() |
| +{ |
| + UChar firstChar = m_input->currentInputChar(); |
| + if (isNameStart(firstChar) || nextTwoCharsAreValidEscape()) |
| + return true; |
| + |
| + if (firstChar == '-') { |
| + if (isNameStart(m_input->peek(1))) |
| + return true; |
| + return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2)); |
| + } |
| + |
| + return false; |
| +} |
| + |
| +} // namespace WebCore |