Source/core/css/parser/NewCSSTokenizer.cpp - Issue 171383002: A thread-safe Media Query Parser

Side by Side Diff: Source/core/css/parser/NewCSSTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (C) 2013 Google Inc. All rights reserved.

	3 *

	4 * Redistribution and use in source and binary forms, with or without

	5 * modification, are permitted provided that the following conditions are

	6 * met:

	7 *

	8 * * Redistributions of source code must retain the above copyright

	9 * notice, this list of conditions and the following disclaimer.

	10 * * Redistributions in binary form must reproduce the above

	11 * copyright notice, this list of conditions and the following disclaimer

	12 * in the documentation and/or other materials provided with the

	13 * distribution.

	14 * * Neither the name of Google Inc. nor the names of its

	15 * contributors may be used to endorse or promote products derived from

	16 * this software without specific prior written permission.

	17 *

	18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	29 */

	30

	31 #include "config.h"

	32 #include "core/css/parser/NewCSSTokenizer.h"

	33

	34 #include "core/css/parser/CSSInputStream.h"

	35 #include "core/css/parser/CSSParserIdioms.h"

	36 #include "platform/text/SegmentedString.h"

	37 #include "wtf/TemporaryChange.h"

	38 #include "wtf/unicode/CharacterNames.h"

	39

	40 namespace WebCore {

	41

	42 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point

	43 static bool isNameStart(UChar c)

	44 {

	45 if (isASCIIAlpha(c))

	46 return true;

	47 if (c == '_')

	48 return true;

	49 return !isASCII(c);

	50 }

	51

	52 // http://www.w3.org/TR/css-syntax-3/#name-code-point

	53 static bool isNameChar(UChar c)

	54 {

	55 return isNameStart(c) \|\| isASCIIDigit(c) \|\| c == '-';

	56 }

	57

	58 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e

	59 static bool twoCharsAreValidEscape(UChar first, UChar second)

	60 {

	61 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ;

	62 }

	63

	64 NewCSSTokenizer::NewCSSTokenizer()

	65 {

	66 }

	67

	68 void NewCSSTokenizer::reconsume(UChar c)

	69 {

	70 m_input->pushBack(c);

	71 }

	72

	73 UChar NewCSSTokenizer::consume()

	74 {

	75 UChar current = m_input->currentInputChar();

	76 m_input->advance();

	77 return current;

	78 }

	79

	80 void NewCSSTokenizer::tokenize(String string, Vector<CSSToken>& outTokens)

	81 {

	82 NewCSSTokenizer tokenizer;

	83 CSSInputStream input(string);

	84 while (true) {

	85 outTokens.append(tokenizer.nextToken(input));

	86 if (outTokens.last().type() == EOFToken)

	87 return;

	88 }

	89 }

	90

	91 CSSToken NewCSSTokenizer::nextToken(CSSInputStream& input)

	92 {

	93 // Unlike the HTMLTokenizer, the CSS Syntax spec is written

	94 // as a stateless, (fixed-size) look-ahead tokenizer.

	95 // We could move to the stateful model and instead create

	96 // states for all the "next 3 codepoints are X" cases.

	97 // State-machine tokenizers are easier to write to handle

	98 // incremental tokenization of partial sources.

	99 // However, for now we follow the spec exactly.

	100 m_input = &input;

	101 UChar cc = consume();

	102

	103 if (isCSSSpace(cc)) {

	104 // CSS Tokenization is currently lossy, but we could record

	105 // the exact whitespace instead of discarding it here.

	106 consumeUntilNotWhitespace();

	107 return CSSToken(WhitespaceToken);

	108 }

	109 if (cc == '\"' \|\| cc == '\'')

	110 return consumeStringTokenUntil(cc);

	111 if (cc == '#') {

	112 if (nextCharIsNameChar() \|\| nextTwoCharsAreValidEscape()) {

	113 HashTokenType hashType = UnrestrictedHashToken;

	114 if (nextCharsAreIdentifier())

	115 hashType = IdHashToken;

	116 return CSSToken(HashToken, consumeName(), hashType);

	117 }

	118 return CSSToken(DelimToken, cc);

	119 }

	120 if (cc == '$') {

	121 if (consumeIfNext('='))

	122 return CSSToken(SuffixMatchToken);

	123 return CSSToken(DelimToken, cc);

	124 }

	125 if (cc == '(')

	126 return CSSToken(LeftParenToken);

	127 if (cc == ')')

	128 return CSSToken(RightParenToken);

	129 if (cc == '*') {

	130 if (consumeIfNext('='))

	131 return CSSToken(SubstringMatchToken);

	132 return CSSToken(DelimToken, cc);

	133 }

	134 if (cc == '+' \|\| cc == '.') {

	135 if (nextCharsAreNumber()) {

	136 reconsume(cc);

	137 return consumeNumericToken();

	138 }

	139 return CSSToken(DelimToken, cc);

	140 }

	141 if (cc == ',')

	142 return CSSToken(CommaToken);

	143 if (cc == '-') {

	144 if (nextCharsAreNumber()) {

	145 reconsume(cc);

	146 return consumeNumericToken();

	147 }

	148 if (nextCharsAreIdentifier()) {

	149 reconsume(cc);

	150 return consumeIdentLikeToken();

	151 }

	152 if (consumeIfNext("->"))

	153 return CSSToken(CDCToken);

	154 return CSSToken(DelimToken, cc);

	155 }

	156 if (cc == '/') {

	157 if (consumeIfNext('*')) {

	158 consumeThroughCommentEndOrUntilEOF();

	159 return nextToken(*m_input);

	160 }

	161 return CSSToken(DelimToken, cc);

	162 }

	163 if (cc == ':')

	164 return CSSToken(ColonToken);

	165 if (cc == ';')

	166 return CSSToken(SemicolonToken);

	167 if (cc == '<') {

	168 if (consumeIfNext("!--"))

	169 return CSSToken(CDOToken);

	170 return CSSToken(DelimToken, cc);

	171 }

	172 if (cc == '@') {

	173 if (nextCharsAreIdentifier())

	174 return CSSToken(AtKeywordToken, consumeName());

	175 return CSSToken(DelimToken, cc);

	176 }

	177 if (cc == '[')

	178 return CSSToken(LeftBracketToken);

	179 if (cc == '\\') {

	180 if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) {

	181 reconsume(cc);

	182 return consumeIdentLikeToken();

	183 }

	184 return CSSToken(DelimToken, cc);

	185 }

	186 if (cc == ']')

	187 return CSSToken(RightBracketToken);

	188 if (cc == '^') {

	189 if (consumeIfNext('='))

	190 return CSSToken(PrefixMatchToken);

	191 return CSSToken(DelimToken, cc);

	192 }

	193 if (cc == '{')

	194 return CSSToken(LeftBraceToken);

	195 if (cc == '{')

	196 return CSSToken(RightBraceToken);

	197 if (isASCIIDigit(cc)) {

	198 // "reconsume" here is not according to spec, but required AFAICT.

	199 // https://www.w3.org/Bugs/Public/show_bug.cgi?id=24661

	200 reconsume(cc);

	201 return consumeNumericToken();

	202 }

	203 // if (cc == 'U' \|\| cc == 'u') {

	204 // // U+0055 LATIN CAPITAL LETTER U (U)

	205 // // U+0075 LATIN SMALL LETTER U (u)

	206 // // If the next 2 input code points are U+002B PLUS SIGN (+) followed by a hex digit or U+003F QUESTION MARK (?), consume the next input code point. N ote: don’t consume both of them. Consume a unicode-range token and return it.

	207 // // Otherwise, reconsume the current input code point, consume an iden t-like token, and return it.

	208 // reconsume(cc);

	209 // return consumeIdentLikeToken();

	210 // }

	211 if (isNameStart(cc)) {

	212 reconsume(cc);

	213 return consumeIdentLikeToken();

	214 }

	215 if (cc == '\|') {

	216 if (consumeIfNext('='))

	217 return CSSToken(DashMatchToken);

	218 if (consumeIfNext('\|'))

	219 return CSSToken(ColumnToken);

	220 return CSSToken(DelimToken, cc);

	221 }

	222 if (cc == '~') {

	223 if (consumeIfNext('='))

	224 return CSSToken(IncludeMatchToken);

	225 return CSSToken(DelimToken, cc);

	226 }

	227 if (cc == kEndOfFileMarker)

	228 return CSSToken(EOFToken);

	229 return CSSToken(DelimToken, cc);

	230 }

	231

	232 // This method merges the following spec sections for efficiency

	233 // http://www.w3.org/TR/css3-syntax/#consume-a-number

	234 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number

	235 CSSToken NewCSSTokenizer::consumeNumber()

	236 {

	237 ASSERT(nextCharsAreNumber());

	238 // FIXME - repr should get the value as a string, even though I'm not sure i t's useful

	239 String repr;

	240 NumericValueType type = IntegerValueType;

	241 double value = 0;

	242 int sign = 1;

	243 unsigned peekOffset = 0;

	244 int exponentSign = 1;

	245 unsigned exponentStartPos = 0;

	246 unsigned exponentEndPos = 0;

	247 unsigned fractionStartPos = 0;

	248 unsigned fractionEndPos = 0;

	249 unsigned integerPart;

	250 unsigned fractionPart;

	251 unsigned fractionDigits;

	252 unsigned exponentPart;

	253 if (m_input->currentInputChar() == '+') {

	254 ++peekOffset;

	255 } else if (m_input->peek(peekOffset) == '-') {

	256 sign = -1;

	257 ++peekOffset;

	258 }

	259 unsigned intStartPos = peekOffset;

	260 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

	261 unsigned intEndPos = peekOffset;

	262 if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOff set))) {

	263 fractionStartPos = peekOffset;

	264 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

	265 fractionEndPos = peekOffset;

	266 }

	267 if ((m_input->peek(peekOffset) == 'E' \|\| m_input->peek(peekOffset) == 'e')) {

	268 ++peekOffset;

	269 if (m_input->peek(peekOffset) == '+') {

	270 ++peekOffset;

	271 } else if (m_input->peek(peekOffset) =='-') {

	272 exponentSign = -1;

	273 ++peekOffset;

	274 }

	275 exponentStartPos = peekOffset;

	276 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

	277 exponentEndPos = peekOffset;

	278 }

	279 integerPart = m_input->getUnsignedInt(intStartPos, intEndPos);

	280 fractionPart = m_input->getUnsignedInt(fractionStartPos, fractionEndPos);

	281 fractionDigits = fractionEndPos - fractionStartPos;

	282 exponentPart = m_input->getUnsignedInt(exponentStartPos, exponentEndPos);

	283 value = sign * (integerPart + fractionPart * pow(10, -1 * fractionDigits)) * pow(10, exponentSign * exponentPart);

	284

	285 m_input->advance(peekOffset);

	286 // FIXME - Always returning an Integer type. Need to look at fractions, etc.

	287

	288 return CSSToken(NumberToken, repr, value, type);

	289 }

	290

	291 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token

	292 CSSToken NewCSSTokenizer::consumeNumericToken()

	293 {

	294 CSSToken token = consumeNumber();

	295 if (nextCharsAreIdentifier())

	296 token.convertToDimensionWithUnit(consumeName());

	297 else if (consumeIfNext('%'))

	298 token.convertToPercentage();

	299 return token;

	300 }

	301

	302 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token

	303 CSSToken NewCSSTokenizer::consumeIdentLikeToken()

	304 {

	305 String name = consumeName();

	306 if (consumeIfNext('(')) {

	307 if (equalIgnoringCase(name, "url"))

	308 return consumeURLToken();

	309 return CSSToken(FunctionToken, name);

	310 }

	311 return CSSToken(IdentToken, name);

	312 }

	313

	314 // http://dev.w3.org/csswg/css-syntax/#consume-a-string-token

	315 CSSToken NewCSSTokenizer::consumeStringTokenUntil(UChar endingCodePoint)

	316 {

	317 // FIXME: Implement.

	318 return CSSToken(BadStringToken);

	319 }

	320

	321 // http://www.w3.org/TR/css3-syntax/#consume-a-url-token

	322 CSSToken NewCSSTokenizer::consumeURLToken()

	323 {

	324 // FIXME: Implement.

	325 return CSSToken(BadURLToken);

	326 }

	327

	328 void NewCSSTokenizer::consumeUntilNotWhitespace()

	329 {

	330 while (m_input->currentInputChar() == '\t' \|\| m_input->currentInputChar() == ' ' \|\| m_input->currentInputChar() == '\n')

	331 consume();

	332 }

	333

	334 void NewCSSTokenizer::consumeThroughCommentEndOrUntilEOF()

	335 {

	336 // FIXME: Implement.

	337 }

	338

	339 bool NewCSSTokenizer::consumeIfNext(UChar character)

	340 {

	341 return (m_input->currentInputChar() == character);

	342 }

	343

	344 bool NewCSSTokenizer::consumeIfNext(String str)

	345 {

	346 for (unsigned i = 0; i < str.length(); ++i) {

	347 if (str[i] != m_input->peek(i))

	348 return false;

	349 }

	350 return true;

	351 }

	352

	353 // http://www.w3.org/TR/css3-syntax/#consume-a-name

	354 String NewCSSTokenizer::consumeName()

	355 {

	356 // FIXME: This is written to match the spec

	357 // but could be much more efficient.

	358 String result("");

	359 while (true) {

	360 if (isNameChar(m_input->currentInputChar())) {

	361 result.append(consume());

	362 continue;

	363 }

	364 if (nextTwoCharsAreValidEscape()) {

	365 consume(); // SPEC BUG: Emailed Tab.

	366 result.append(consumeEscape());

	367 continue;

	368 }

	369 return result;

	370 }

	371 }

	372

	373 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point

	374 UChar NewCSSTokenizer::consumeEscape()

	375 {

	376 UChar cc = consume();

	377 ASSERT(cc != '\n');

	378 if (isASCIIHexDigit(cc)) {

	379 unsigned consumedHexDigits = 1;

	380 String hexChars;

	381 do {

	382 hexChars.append(cc);

	383 cc = consume();

	384 consumedHexDigits++;

	385 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));

	386 bool ok = false;

	387 UChar codePoint = hexChars.toUIntStrict(&ok, 16);

	388 if (!ok)

	389 return WTF::Unicode::replacementCharacter;

	390 return codePoint;

	391 }

	392 if (cc == kEndOfFileMarker)

	393 return WTF::Unicode::replacementCharacter;

	394 return cc;

	395 }

	396

	397 bool NewCSSTokenizer::nextCharIsNameChar()

	398 {

	399 return isNameChar(m_input->currentInputChar());

	400 }

	401

	402 bool NewCSSTokenizer::nextTwoCharsAreValidEscape()

	403 {

	404 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));

	405 }

	406

	407 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number

	408 bool NewCSSTokenizer::nextCharsAreNumber()

	409 {

	410 UChar first = m_input->currentInputChar();

	411 UChar second = m_input->peek(1);

	412 if (isASCIIDigit(first))

	413 return true;

	414 if (first == '+' \|\| first == '-')

	415 return ((isASCIIDigit(second)) \|\| (second == '.' && isASCIIDigit(m_input ->peek(2))));

	416 if (first =='.')

	417 return (isASCIIDigit(second));

	418 return false;

	419 }

	420

	421 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier

	422 bool NewCSSTokenizer::nextCharsAreIdentifier()

	423 {

	424 UChar firstChar = m_input->currentInputChar();

	425 if (isNameStart(firstChar) \|\| nextTwoCharsAreValidEscape())

	426 return true;

	427

	428 if (firstChar == '-') {

	429 if (isNameStart(m_input->peek(1)))

	430 return true;

	431 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));

	432 }

	433

	434 return false;

	435 }

	436

	437 } // namespace WebCore

OLD	NEW

« Source/core/css/parser/MediaQueryParserTest.cpp ('K') | « Source/core/css/parser/NewCSSTokenizer.h ('k') | Source/core/css/parser/NewCSSTokenizerTest.cpp » ('j') | no next file with comments »