Source/core/css/parser/NewCSSTokenizer.cpp - Issue 171383002: A thread-safe Media Query Parser

Unified Diff: Source/core/css/parser/NewCSSTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« Source/core/css/parser/MediaQueryParserTest.cpp ('K') | « Source/core/css/parser/NewCSSTokenizer.h ('k') | Source/core/css/parser/NewCSSTokenizerTest.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: Source/core/css/parser/NewCSSTokenizer.cpp

diff --git a/Source/core/css/parser/NewCSSTokenizer.cpp b/Source/core/css/parser/NewCSSTokenizer.cpp

new file mode 100644

index 0000000000000000000000000000000000000000..d053944c3b92d67a87ac6cadbed86cfca4920a64

--- /dev/null

+++ b/Source/core/css/parser/NewCSSTokenizer.cpp

@@ -0,0 +1,437 @@

+/*

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions are

+ * met:

+ *

+ * * Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ * * Redistributions in binary form must reproduce the above

+ * copyright notice, this list of conditions and the following disclaimer

+ * in the documentation and/or other materials provided with the

+ * distribution.

+ * * Neither the name of Google Inc. nor the names of its

+ * contributors may be used to endorse or promote products derived from

+ * this software without specific prior written permission.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ */

+#include "config.h"

+#include "core/css/parser/NewCSSTokenizer.h"

+#include "core/css/parser/CSSInputStream.h"

+#include "core/css/parser/CSSParserIdioms.h"

+#include "platform/text/SegmentedString.h"

+#include "wtf/TemporaryChange.h"

+#include "wtf/unicode/CharacterNames.h"

+namespace WebCore {

+// http://dev.w3.org/csswg/css-syntax/#name-start-code-point

+static bool isNameStart(UChar c)

+ if (isASCIIAlpha(c))

+ return true;

+ if (c == '_')

+ return true;

+ return !isASCII(c);

+// http://www.w3.org/TR/css-syntax-3/#name-code-point

+static bool isNameChar(UChar c)

+ return isNameStart(c) || isASCIIDigit(c) || c == '-';

+// http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escape

+static bool twoCharsAreValidEscape(UChar first, UChar second)

+ return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker));

+NewCSSTokenizer::NewCSSTokenizer()

+void NewCSSTokenizer::reconsume(UChar c)

+ m_input->pushBack(c);

+UChar NewCSSTokenizer::consume()

+ UChar current = m_input->currentInputChar();

+ m_input->advance();

+ return current;

+void NewCSSTokenizer::tokenize(String string, Vector<CSSToken>& outTokens)

+ NewCSSTokenizer tokenizer;

+ CSSInputStream input(string);

+ while (true) {

+ outTokens.append(tokenizer.nextToken(input));

+ if (outTokens.last().type() == EOFToken)

+ return;

+ }

+CSSToken NewCSSTokenizer::nextToken(CSSInputStream& input)

+ // Unlike the HTMLTokenizer, the CSS Syntax spec is written

+ // as a stateless, (fixed-size) look-ahead tokenizer.

+ // We could move to the stateful model and instead create

+ // states for all the "next 3 codepoints are X" cases.

+ // State-machine tokenizers are easier to write to handle

+ // incremental tokenization of partial sources.

+ // However, for now we follow the spec exactly.

+ m_input = &input;

+ UChar cc = consume();

+ if (isCSSSpace(cc)) {

+ // CSS Tokenization is currently lossy, but we could record

+ // the exact whitespace instead of discarding it here.

+ consumeUntilNotWhitespace();

+ return CSSToken(WhitespaceToken);

+ }

+ if (cc == '\"' || cc == '\'')

+ return consumeStringTokenUntil(cc);

+ if (cc == '#') {

+ if (nextCharIsNameChar() || nextTwoCharsAreValidEscape()) {

+ HashTokenType hashType = UnrestrictedHashToken;

+ if (nextCharsAreIdentifier())

+ hashType = IdHashToken;

+ return CSSToken(HashToken, consumeName(), hashType);

+ }

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == '$') {

+ if (consumeIfNext('='))

+ return CSSToken(SuffixMatchToken);

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == '(')

+ return CSSToken(LeftParenToken);

+ if (cc == ')')

+ return CSSToken(RightParenToken);

+ if (cc == '*') {

+ if (consumeIfNext('='))

+ return CSSToken(SubstringMatchToken);

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == '+' || cc == '.') {

+ if (nextCharsAreNumber()) {

+ reconsume(cc);

+ return consumeNumericToken();

+ }

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == ',')

+ return CSSToken(CommaToken);

+ if (cc == '-') {

+ if (nextCharsAreNumber()) {

+ reconsume(cc);

+ return consumeNumericToken();

+ }

+ if (nextCharsAreIdentifier()) {

+ reconsume(cc);

+ return consumeIdentLikeToken();

+ }

+ if (consumeIfNext("->"))

+ return CSSToken(CDCToken);

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == '/') {

+ if (consumeIfNext('*')) {

+ consumeThroughCommentEndOrUntilEOF();

+ return nextToken(*m_input);

+ }

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == ':')

+ return CSSToken(ColonToken);

+ if (cc == ';')

+ return CSSToken(SemicolonToken);

+ if (cc == '<') {

+ if (consumeIfNext("!--"))

+ return CSSToken(CDOToken);

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == '@') {

+ if (nextCharsAreIdentifier())

+ return CSSToken(AtKeywordToken, consumeName());

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == '[')

+ return CSSToken(LeftBracketToken);

+ if (cc == '\\') {

+ if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) {

+ reconsume(cc);

+ return consumeIdentLikeToken();

+ }

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == ']')

+ return CSSToken(RightBracketToken);

+ if (cc == '^') {

+ if (consumeIfNext('='))

+ return CSSToken(PrefixMatchToken);

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == '{')

+ return CSSToken(LeftBraceToken);

+ if (cc == '{')

+ return CSSToken(RightBraceToken);

+ if (isASCIIDigit(cc)) {

+ // "reconsume" here is not according to spec, but required AFAICT.

+ // https://www.w3.org/Bugs/Public/show_bug.cgi?id=24661

+ reconsume(cc);

+ return consumeNumericToken();

+ }

+ // if (cc == 'U' || cc == 'u') {

+ // // U+0055 LATIN CAPITAL LETTER U (U)

+ // // U+0075 LATIN SMALL LETTER U (u)

+ // // If the next 2 input code points are U+002B PLUS SIGN (+) followed by a hex digit or U+003F QUESTION MARK (?), consume the next input code point. Note: don’t consume both of them. Consume a unicode-range token and return it.

+ // // Otherwise, reconsume the current input code point, consume an ident-like token, and return it.

+ // reconsume(cc);

+ // return consumeIdentLikeToken();

+ // }

+ if (isNameStart(cc)) {

+ reconsume(cc);

+ return consumeIdentLikeToken();

+ }

+ if (cc == '|') {

+ if (consumeIfNext('='))

+ return CSSToken(DashMatchToken);

+ if (consumeIfNext('|'))

+ return CSSToken(ColumnToken);

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == '~') {

+ if (consumeIfNext('='))

+ return CSSToken(IncludeMatchToken);

+ return CSSToken(DelimToken, cc);

+ }

+ if (cc == kEndOfFileMarker)

+ return CSSToken(EOFToken);

+ return CSSToken(DelimToken, cc);

+// This method merges the following spec sections for efficiency

+// http://www.w3.org/TR/css3-syntax/#consume-a-number

+// http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number

+CSSToken NewCSSTokenizer::consumeNumber()

+ ASSERT(nextCharsAreNumber());

+ // FIXME - repr should get the value as a string, even though I'm not sure it's useful

+ String repr;

+ NumericValueType type = IntegerValueType;

+ double value = 0;

+ int sign = 1;

+ unsigned peekOffset = 0;

+ int exponentSign = 1;

+ unsigned exponentStartPos = 0;

+ unsigned exponentEndPos = 0;

+ unsigned fractionStartPos = 0;

+ unsigned fractionEndPos = 0;

+ unsigned integerPart;

+ unsigned fractionPart;

+ unsigned fractionDigits;

+ unsigned exponentPart;

+ if (m_input->currentInputChar() == '+') {

+ ++peekOffset;

+ } else if (m_input->peek(peekOffset) == '-') {

+ sign = -1;

+ ++peekOffset;

+ }

+ unsigned intStartPos = peekOffset;

+ peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

+ unsigned intEndPos = peekOffset;

+ if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOffset))) {

+ fractionStartPos = peekOffset;

+ peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

+ fractionEndPos = peekOffset;

+ }

+ if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e')) {

+ ++peekOffset;

+ if (m_input->peek(peekOffset) == '+') {

+ ++peekOffset;

+ } else if (m_input->peek(peekOffset) =='-') {

+ exponentSign = -1;

+ ++peekOffset;

+ }

+ exponentStartPos = peekOffset;

+ peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

+ exponentEndPos = peekOffset;

+ }

+ integerPart = m_input->getUnsignedInt(intStartPos, intEndPos);

+ fractionPart = m_input->getUnsignedInt(fractionStartPos, fractionEndPos);

+ fractionDigits = fractionEndPos - fractionStartPos;

+ exponentPart = m_input->getUnsignedInt(exponentStartPos, exponentEndPos);

+ value = sign * (integerPart + fractionPart * pow(10, -1 * fractionDigits)) * pow(10, exponentSign * exponentPart);

+ m_input->advance(peekOffset);

+ // FIXME - Always returning an Integer type. Need to look at fractions, etc.

+ return CSSToken(NumberToken, repr, value, type);

+// http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token

+CSSToken NewCSSTokenizer::consumeNumericToken()

+ CSSToken token = consumeNumber();

+ if (nextCharsAreIdentifier())

+ token.convertToDimensionWithUnit(consumeName());

+ else if (consumeIfNext('%'))

+ token.convertToPercentage();

+ return token;

+// http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token

+CSSToken NewCSSTokenizer::consumeIdentLikeToken()

+ String name = consumeName();

+ if (consumeIfNext('(')) {

+ if (equalIgnoringCase(name, "url"))

+ return consumeURLToken();

+ return CSSToken(FunctionToken, name);

+ }

+ return CSSToken(IdentToken, name);

+// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token

+CSSToken NewCSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint)

+ // FIXME: Implement.

+ return CSSToken(BadStringToken);

+// http://www.w3.org/TR/css3-syntax/#consume-a-url-token

+CSSToken NewCSSTokenizer::consumeURLToken()

+ // FIXME: Implement.

+ return CSSToken(BadURLToken);

+void NewCSSTokenizer::consumeUntilNotWhitespace()

+ while (m_input->currentInputChar() == '\t' || m_input->currentInputChar() == ' ' || m_input->currentInputChar() == '\n')

+ consume();

+void NewCSSTokenizer::consumeThroughCommentEndOrUntilEOF()

+ // FIXME: Implement.

+bool NewCSSTokenizer::consumeIfNext(UChar character)

+ return (m_input->currentInputChar() == character);

+bool NewCSSTokenizer::consumeIfNext(String str)

+ for (unsigned i = 0; i < str.length(); ++i) {

+ if (str[i] != m_input->peek(i))

+ return false;

+ }

+ return true;

+// http://www.w3.org/TR/css3-syntax/#consume-a-name

+String NewCSSTokenizer::consumeName()

+ // FIXME: This is written to match the spec

+ // but could be much more efficient.

+ String result("");

+ while (true) {

+ if (isNameChar(m_input->currentInputChar())) {

+ result.append(consume());

+ continue;

+ }

+ if (nextTwoCharsAreValidEscape()) {

+ consume(); // SPEC BUG: Emailed Tab.

+ result.append(consumeEscape());

+ continue;

+ }

+ return result;

+ }

+// http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point

+UChar NewCSSTokenizer::consumeEscape()

+ UChar cc = consume();

+ ASSERT(cc != '\n');

+ if (isASCIIHexDigit(cc)) {

+ unsigned consumedHexDigits = 1;

+ String hexChars;

+ do {

+ hexChars.append(cc);

+ cc = consume();

+ consumedHexDigits++;

+ } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));

+ bool ok = false;

+ UChar codePoint = hexChars.toUIntStrict(&ok, 16);

+ if (!ok)

+ return WTF::Unicode::replacementCharacter;

+ return codePoint;

+ }

+ if (cc == kEndOfFileMarker)

+ return WTF::Unicode::replacementCharacter;

+ return cc;

+bool NewCSSTokenizer::nextCharIsNameChar()

+ return isNameChar(m_input->currentInputChar());

+bool NewCSSTokenizer::nextTwoCharsAreValidEscape()

+ return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));

+// http://www.w3.org/TR/css3-syntax/#starts-with-a-number

+bool NewCSSTokenizer::nextCharsAreNumber()

+ UChar first = m_input->currentInputChar();

+ UChar second = m_input->peek(1);

+ if (isASCIIDigit(first))

+ return true;

+ if (first == '+' || first == '-')

+ return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input->peek(2))));

+ if (first =='.')

+ return (isASCIIDigit(second));

+ return false;

+// http://www.w3.org/TR/css3-syntax/#would-start-an-identifier

+bool NewCSSTokenizer::nextCharsAreIdentifier()

+ UChar firstChar = m_input->currentInputChar();

+ if (isNameStart(firstChar) || nextTwoCharsAreValidEscape())

+ return true;

+ if (firstChar == '-') {

+ if (isNameStart(m_input->peek(1)))

+ return true;

+ return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));

+ }

+ return false;

+} // namespace WebCore

« Source/core/css/parser/MediaQueryParserTest.cpp ('K') | « Source/core/css/parser/NewCSSTokenizer.h ('k') | Source/core/css/parser/NewCSSTokenizerTest.cpp » ('j') | no next file with comments »