Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(590)

Unified Diff: Source/core/css/parser/MediaQueryTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Removed comment Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: Source/core/css/parser/MediaQueryTokenizer.cpp
diff --git a/Source/core/css/parser/MediaQueryTokenizer.cpp b/Source/core/css/parser/MediaQueryTokenizer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b3f4a647026b83cd74171fadd553523b05f2029c
--- /dev/null
+++ b/Source/core/css/parser/MediaQueryTokenizer.cpp
@@ -0,0 +1,370 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "config.h"
+#include "core/css/parser/MediaQueryTokenizer.h"
+
+#include "core/css/parser/MediaQueryInputStream.h"
+#include "core/html/parser/HTMLParserIdioms.h"
+#include "wtf/unicode/CharacterNames.h"
+
+namespace WebCore {
+
+// http://dev.w3.org/csswg/css-syntax/#name-start-code-point
+static bool isNameStart(UChar c)
+{
+ if (isASCIIAlpha(c))
+ return true;
+ if (c == '_')
+ return true;
+ return !isASCII(c);
+}
+
+// http://www.w3.org/TR/css-syntax-3/#name-code-point
+static bool isNameChar(UChar c)
+{
+ return isNameStart(c) || isASCIIDigit(c) || c == '-';
+}
+
+// http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escape
+static bool twoCharsAreValidEscape(UChar first, UChar second)
+{
+ return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker));
+}
+
+MediaQueryTokenizer::MediaQueryTokenizer()
+{
+}
+
+void MediaQueryTokenizer::reconsume(UChar c)
+{
+ m_input->pushBack(c);
+}
+
+UChar MediaQueryTokenizer::consume()
+{
+ UChar current = m_input->currentInputChar();
+ m_input->advance();
+ return current;
+}
+
+void MediaQueryTokenizer::consume(unsigned offset)
+{
+ m_input->advance(offset);
+}
+
+MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc)
+{
+ // CSS Tokenization is currently lossy, but we could record
+ // the exact whitespace instead of discarding it here.
+ consumeUntilNotWhitespace();
+ return MediaQueryToken(WhitespaceToken);
+}
+
+MediaQueryToken MediaQueryTokenizer::leftParen(UChar cc)
+{
+ return MediaQueryToken(LeftParenToken);
+}
+
+MediaQueryToken MediaQueryTokenizer::rightParen(UChar cc)
+{
+ return MediaQueryToken(RightParenToken);
+}
+
+MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc)
+{
+ if (nextCharsAreNumber()) {
+ reconsume(cc);
+ return consumeNumericToken();
+ }
+ return MediaQueryToken(DelimToken, cc);
kenneth.r.christiansen 2014/03/08 22:37:47 DelimiterToken why not write it out, it is quite s
+}
+
+MediaQueryToken MediaQueryTokenizer::comma(UChar cc)
+{
+ return MediaQueryToken(CommaToken);
+}
+
+MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc)
+{
+ if (nextCharsAreNumber()) {
+ reconsume(cc);
+ return consumeNumericToken();
+ }
+ if (nextCharsAreIdentifier()) {
+ reconsume(cc);
+ return consumeIdentLikeToken();
+ }
+ return MediaQueryToken(DelimToken, cc);
+}
+
+MediaQueryToken MediaQueryTokenizer::solidus(UChar cc)
+{
+ return MediaQueryToken(DelimToken, cc);
+}
+
+MediaQueryToken MediaQueryTokenizer::colon(UChar cc)
+{
+ return MediaQueryToken(ColonToken);
+}
+
+MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc)
+{
+ return MediaQueryToken(SemicolonToken);
+}
+
+MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc)
+{
+ if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) {
+ reconsume(cc);
+ return consumeIdentLikeToken();
+ }
+ return MediaQueryToken(DelimToken, cc);
+}
+
+MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc)
+{
+ reconsume(cc);
+ return consumeNumericToken();
+}
+
+MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc)
+{
+ reconsume(cc);
+ return consumeIdentLikeToken();
+}
+
+MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc)
+{
+ return MediaQueryToken(EOFToken);
+}
+
+void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTokens)
+{
+ MediaQueryTokenizer tokenizer;
+ // According to the spec, we should perform preprocessing here.
+ // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing
+ //
+ // However, we can skip this step since:
+ // * We're using HTML spaces (which accept \r and \f as a valid white space)
+ // * Do not count white spaces
+ // * consumeEscape replaces NULLs for replacement characters
+
+ MediaQueryInputStream input(string);
+ while (true) {
+ outTokens.append(tokenizer.nextToken(input));
+ if (outTokens.last().type() == EOFToken)
+ return;
+ }
+}
+
+MediaQueryToken MediaQueryTokenizer::nextToken(MediaQueryInputStream& input)
+{
+ // Unlike the HTMLTokenizer, the CSS Syntax spec is written
+ // as a stateless, (fixed-size) look-ahead tokenizer.
+ // We could move to the stateful model and instead create
+ // states for all the "next 3 codepoints are X" cases.
+ // State-machine tokenizers are easier to write to handle
+ // incremental tokenization of partial sources.
+ // However, for now we follow the spec exactly.
+ m_input = &input;
+ UChar cc = consume();
+ CodePoint codePointFunc = 0;
+
+ if (isASCII(cc)) {
+ ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM);
+ codePointFunc = getCodePoints()->codePoints[cc];
+ } else {
+ codePointFunc = &MediaQueryTokenizer::nameStart;
+ }
+
+ if (codePointFunc)
+ return ((this)->*(codePointFunc))(cc);
+
+ return MediaQueryToken(DelimToken, cc);
+}
+
+// This method merges the following spec sections for efficiency
+// http://www.w3.org/TR/css3-syntax/#consume-a-number
+// http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number
+MediaQueryToken MediaQueryTokenizer::consumeNumber()
+{
+ ASSERT(nextCharsAreNumber());
+ NumericValueType type = IntegerValueType;
+ double value = 0;
+ int sign = 1;
+ unsigned peekOffset = 0;
+ int exponentSign = 1;
+ unsigned exponentStartPos = 0;
+ unsigned exponentEndPos = 0;
+ unsigned fractionStartPos = 0;
+ unsigned fractionEndPos = 0;
+ unsigned long long integerPart;
+ double fractionPart;
+ unsigned fractionDigits;
+ unsigned long long exponentPart;
+ if (m_input->currentInputChar() == '+') {
+ ++peekOffset;
+ } else if (m_input->peek(peekOffset) == '-') {
+ sign = -1;
+ ++peekOffset;
+ }
+ unsigned intStartPos = peekOffset;
+ peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
+ unsigned intEndPos = peekOffset;
+ if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOffset))) {
+ fractionStartPos = peekOffset - 1;
+ peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
+ fractionEndPos = peekOffset;
+ }
+ if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e')) {
+ int peekOffsetBeforeExponent = peekOffset;
+ ++peekOffset;
+ if (m_input->peek(peekOffset) == '+') {
+ ++peekOffset;
+ } else if (m_input->peek(peekOffset) =='-') {
+ exponentSign = -1;
+ ++peekOffset;
+ }
+ exponentStartPos = peekOffset;
+ peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
+ exponentEndPos = peekOffset;
+ if (exponentEndPos == exponentStartPos)
+ peekOffset = peekOffsetBeforeExponent;
+ }
+ integerPart = m_input->getUInt(intStartPos, intEndPos);
+ fractionDigits = fractionEndPos - fractionStartPos;
+ unsigned floatingFractionEndPos = fractionEndPos;
+ fractionPart = m_input->getDouble(fractionStartPos, floatingFractionEndPos);
+ exponentPart = m_input->getUInt(exponentStartPos, exponentEndPos);
+ double exponent = pow(10, (float)exponentSign * (double)exponentPart);
+ value = (double)sign * ((double)integerPart + fractionPart) * exponent;
+
+ m_input->advance(peekOffset);
+ if (fractionDigits > 0)
+ type = NumberValueType;
+
+ return MediaQueryToken(NumberToken, value, type);
+}
+
+// http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token
+MediaQueryToken MediaQueryTokenizer::consumeNumericToken()
+{
+ MediaQueryToken token = consumeNumber();
+ if (nextCharsAreIdentifier())
+ token.convertToDimensionWithUnit(consumeName());
+ else if (consumeIfNext('%'))
+ token.convertToPercentage();
+ return token;
+}
+
+// http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token
+MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken()
+{
+ String name = consumeName();
+ if (consumeIfNext('('))
+ return MediaQueryToken(FunctionToken, name);
+ return MediaQueryToken(IdentToken, name);
+}
+
+void MediaQueryTokenizer::consumeUntilNotWhitespace()
+{
+ // Using HTML space here rather than CSS space since we don't do preprocessing
+ while (isHTMLSpace<UChar>(m_input->currentInputChar()))
+ consume();
+}
+
+bool MediaQueryTokenizer::consumeIfNext(UChar character)
+{
+ if (m_input->currentInputChar() == character) {
+ consume();
+ return true;
+ }
+ return false;
+}
+
+// http://www.w3.org/TR/css3-syntax/#consume-a-name
+String MediaQueryTokenizer::consumeName()
+{
+ // FIXME: Is this as efficient as it can be?
+ // The possibility of escape chars mandates a copy AFAICT.
+ Vector<UChar> result;
+ while (true) {
+ if (isNameChar(m_input->currentInputChar())) {
+ result.append(consume());
+ continue;
+ }
+ if (nextTwoCharsAreValidEscape()) {
+ // "consume()" fixes a spec bug.
+ // The first code point should be consumed before consuming the escaped code point.
+ consume();
+ result.append(consumeEscape());
+ continue;
+ }
+ return String(result);
+ }
+}
+
+// http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point
+UChar MediaQueryTokenizer::consumeEscape()
+{
+ UChar cc = consume();
+ ASSERT(cc != '\n');
+ if (isASCIIHexDigit(cc)) {
+ unsigned consumedHexDigits = 1;
+ String hexChars;
+ do {
+ hexChars.append(cc);
+ cc = consume();
+ consumedHexDigits++;
+ } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));
+ bool ok = false;
+ UChar codePoint = hexChars.toUIntStrict(&ok, 16);
+ if (!ok)
+ return WTF::Unicode::replacementCharacter;
+ return codePoint;
+ }
+
+ // Replaces NULLs with replacement characters, since we do not perform preprocessing
+ if (cc == kEndOfFileMarker)
+ return WTF::Unicode::replacementCharacter;
+ return cc;
+}
+
+bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()
+{
+ return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
+}
+
+// http://www.w3.org/TR/css3-syntax/#starts-with-a-number
+bool MediaQueryTokenizer::nextCharsAreNumber()
+{
+ UChar first = m_input->currentInputChar();
+ UChar second = m_input->peek(1);
+ if (isASCIIDigit(first))
+ return true;
+ if (first == '+' || first == '-')
+ return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input->peek(2))));
+ if (first =='.')
+ return (isASCIIDigit(second));
+ return false;
+}
+
+// http://www.w3.org/TR/css3-syntax/#would-start-an-identifier
+bool MediaQueryTokenizer::nextCharsAreIdentifier()
+{
+ UChar firstChar = m_input->currentInputChar();
+ if (isNameStart(firstChar) || nextTwoCharsAreValidEscape())
+ return true;
+
+ if (firstChar == '-') {
+ if (isNameStart(m_input->peek(1)))
+ return true;
+ return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
+ }
+
+ return false;
+}
+
+} // namespace WebCore

Powered by Google App Engine
This is Rietveld 408576698