Source/core/css/parser/MediaQueryTokenizer.cpp - Issue 171383002: A thread-safe Media Query Parser

Side by Side Diff: Source/core/css/parser/MediaQueryTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Rebased and fixed float parsing Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (C) 2013 Google Inc. All rights reserved.

	3 *

	4 * Redistribution and use in source and binary forms, with or without

	5 * modification, are permitted provided that the following conditions are

	6 * met:

	7 *

	8 * * Redistributions of source code must retain the above copyright

	9 * notice, this list of conditions and the following disclaimer.

	10 * * Redistributions in binary form must reproduce the above

	11 * copyright notice, this list of conditions and the following disclaimer

	12 * in the documentation and/or other materials provided with the

	13 * distribution.

	14 * * Neither the name of Google Inc. nor the names of its

	15 * contributors may be used to endorse or promote products derived from

	16 * this software without specific prior written permission.

	17 *

	18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	29 */

	30

	31 #include "config.h"

	32 #include "core/css/parser/MediaQueryTokenizer.h"

	33

	34 #include "core/css/parser/CSSInputStream.h"

	35 #include "core/html/parser/HTMLParserIdioms.h"

	36 #include "wtf/unicode/CharacterNames.h"

	37

	38 namespace WebCore {

	39

	40 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point

	41 static bool isNameStart(UChar c)

	42 {

	43 if (isASCIIAlpha(c))

	44 return true;

	45 if (c == '_')

	46 return true;

	47 return !isASCII(c);

	48 }

	49

	50 // http://www.w3.org/TR/css-syntax-3/#name-code-point

	51 static bool isNameChar(UChar c)

	52 {

	53 return isNameStart(c) \|\| isASCIIDigit(c) \|\| c == '-';

	54 }

	55

	56 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e

	57 static bool twoCharsAreValidEscape(UChar first, UChar second)

	58 {

	59 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ;

	60 }

	61

	62 MediaQueryTokenizer::MediaQueryTokenizer()

	63 {

	64 }

	65

	66 void MediaQueryTokenizer::reconsume(UChar c)

	67 {

	68 m_input->pushBack(c);

	69 }

	70

	71 UChar MediaQueryTokenizer::consume()

	72 {

	73 UChar current = m_input->currentInputChar();

	74 m_input->advance();

	75 return current;

	76 }

	77

	78 void MediaQueryTokenizer::consume(unsigned offset)

	79 {

	80 m_input->advance(offset);

	81 }

	82

	83 CSSToken MediaQueryTokenizer::whiteSpace(UChar cc)

	84 {

	85 // CSS Tokenization is currently lossy, but we could record

	86 // the exact whitespace instead of discarding it here.

	87 consumeUntilNotWhitespace();

	88 return CSSToken(WhitespaceToken);

	89 }

	90

	91 CSSToken MediaQueryTokenizer::leftParen(UChar cc)

	92 {

	93 return CSSToken(LeftParenToken);

	94 }

	95

	96 CSSToken MediaQueryTokenizer::rightParen(UChar cc)

	97 {

	98 return CSSToken(RightParenToken);

	99 }

	100

	101 CSSToken MediaQueryTokenizer::plusOrFullStop(UChar cc)

	102 {

	103 if (nextCharsAreNumber()) {

	104 reconsume(cc);

	105 return consumeNumericToken();

	106 }

	107 return CSSToken(DelimToken, cc);

	108 }

	109

	110 CSSToken MediaQueryTokenizer::comma(UChar cc)

	111 {

	112 return CSSToken(CommaToken);

	113 }

	114

	115 CSSToken MediaQueryTokenizer::hyphenMinus(UChar cc)

	116 {

	117 if (nextCharsAreNumber()) {

	118 reconsume(cc);

	119 return consumeNumericToken();

	120 }

	121 if (nextCharsAreIdentifier()) {

	122 reconsume(cc);

	123 return consumeIdentLikeToken();

	124 }

	125 return CSSToken(DelimToken, cc);

	126 }

	127

	128 CSSToken MediaQueryTokenizer::solidus(UChar cc)

	129 {

	130 return CSSToken(DelimToken, cc);

	131 }

	132

	133 CSSToken MediaQueryTokenizer::colon(UChar cc)

	134 {

	135 return CSSToken(ColonToken);

	136 }

	137

	138 CSSToken MediaQueryTokenizer::semiColon(UChar cc)

	139 {

	140 return CSSToken(SemicolonToken);

	141 }

	142

	143 CSSToken MediaQueryTokenizer::reverseSolidus(UChar cc)

	144 {

	145 if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) {

	146 reconsume(cc);

	147 return consumeIdentLikeToken();

	148 }

	149 return CSSToken(DelimToken, cc);

	150 }

	151

	152 CSSToken MediaQueryTokenizer::asciiDigit(UChar cc)

	153 {

	154 reconsume(cc);

	155 return consumeNumericToken();

	156 }

	157

	158 CSSToken MediaQueryTokenizer::nameStart(UChar cc)

	159 {

	160 reconsume(cc);

	161 return consumeIdentLikeToken();

	162 }

	163

	164 CSSToken MediaQueryTokenizer::endOfFile(UChar cc)

	165 {

	166 return CSSToken(EOFToken);

	167 }

	168

	169 void MediaQueryTokenizer::tokenize(String string, Vector<CSSToken>& outTokens)

	170 {

	171 MediaQueryTokenizer tokenizer;

	172 // According to the spec, we should perform preprocessing here.

	173 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing

	174 //

	175 // However, we can skip this step since:

	176 // * We're using HTML spaces (which accept \r and \f as a valid white space)

	177 // * Do not count white spaces

	178 // * consumeEscape replaces NULLs for replacement characters

	179

	180 CSSInputStream input(string);

	181 while (true) {

	182 outTokens.append(tokenizer.nextToken(input));

	183 if (outTokens.last().type() == EOFToken)

	184 return;

	185 }

	186 }

	187

	188 CSSToken MediaQueryTokenizer::nextToken(CSSInputStream& input)

	189 {

	190 // Unlike the HTMLTokenizer, the CSS Syntax spec is written

	191 // as a stateless, (fixed-size) look-ahead tokenizer.

	192 // We could move to the stateful model and instead create

	193 // states for all the "next 3 codepoints are X" cases.

	194 // State-machine tokenizers are easier to write to handle

	195 // incremental tokenization of partial sources.

	196 // However, for now we follow the spec exactly.

	197 m_input = &input;

	198 UChar cc = consume();

	199 CodePoint codePointFunc = 0;

	200

	201 if (isASCII(cc)) {

	202 ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM);

	203 codePointFunc = getCodePoints()->codePoints[cc];

	204 } else {

	205 codePointFunc = &MediaQueryTokenizer::nameStart;

	206 }

	207

	208 if (codePointFunc)

	209 return ((this)->*(codePointFunc))(cc);

	210

	211 return CSSToken(DelimToken, cc);

	212 }

	213

	214 // This method merges the following spec sections for efficiency

	215 // http://www.w3.org/TR/css3-syntax/#consume-a-number

	216 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number

	217 CSSToken MediaQueryTokenizer::consumeNumber()

	218 {

	219 ASSERT(nextCharsAreNumber());

	220 NumericValueType type = IntegerValueType;

	221 double value = 0;

	222 int sign = 1;

	223 unsigned peekOffset = 0;

	224 int exponentSign = 1;

	225 unsigned exponentStartPos = 0;

	226 unsigned exponentEndPos = 0;

	227 unsigned fractionStartPos = 0;

	228 unsigned fractionEndPos = 0;

	229 unsigned long long integerPart;

	230 double fractionPart;

	231 unsigned fractionDigits;

	232 unsigned long long exponentPart;

	233 if (m_input->currentInputChar() == '+') {

	234 ++peekOffset;

	235 } else if (m_input->peek(peekOffset) == '-') {

	236 sign = -1;

	237 ++peekOffset;

	238 }

	239 unsigned intStartPos = peekOffset;

	240 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

	241 unsigned intEndPos = peekOffset;

	242 if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOff set))) {

	243 fractionStartPos = peekOffset;

	244 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

	245 fractionEndPos = peekOffset;

	246 }

	247 if ((m_input->peek(peekOffset) == 'E' \|\| m_input->peek(peekOffset) == 'e')) {

	248 int peekOffsetBeforeExponent = peekOffset;

	249 ++peekOffset;

	250 if (m_input->peek(peekOffset) == '+') {

	251 ++peekOffset;

	252 } else if (m_input->peek(peekOffset) =='-') {

	253 exponentSign = -1;

	254 ++peekOffset;

	255 }

	256 exponentStartPos = peekOffset;

	257 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

	258 exponentEndPos = peekOffset;

	259 if (exponentEndPos == exponentStartPos)

	260 peekOffset = peekOffsetBeforeExponent;

	261 }

	262 integerPart = m_input->getUInt(intStartPos, intEndPos);

	263 fractionDigits = fractionEndPos - fractionStartPos;

	264 unsigned floatingFractionEndPos = fractionEndPos;

	265 fractionPart = m_input->getDouble(--fractionStartPos, floatingFractionEndPos );

	266 exponentPart = m_input->getUInt(exponentStartPos, exponentEndPos);

	267 double exponent = pow(10, (float)exponentSign * (double)exponentPart);

	268 value = (double)sign * ((double)integerPart + fractionPart) * exponent;

	269

	270 m_input->advance(peekOffset);

	271 if (fractionDigits > 0)

	272 type = NumberValueType;

	273

	274 return CSSToken(NumberToken, value, type);

	275 }

	276

	277 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token

	278 CSSToken MediaQueryTokenizer::consumeNumericToken()

	279 {

	280 CSSToken token = consumeNumber();

	281 if (nextCharsAreIdentifier())

	282 token.convertToDimensionWithUnit(consumeName());

	283 else if (consumeIfNext('%'))

	284 token.convertToPercentage();

	285 return token;

	286 }

	287

	288 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token

	289 CSSToken MediaQueryTokenizer::consumeIdentLikeToken()

	290 {

	291 String name = consumeName();

	292 if (consumeIfNext('('))

	293 return CSSToken(FunctionToken, name);

	294 return CSSToken(IdentToken, name);

	295 }

	296

	297 void MediaQueryTokenizer::consumeUntilNotWhitespace()

	298 {

	299 // Using HTML space here rather than CSS space since we don't do preprocessi ng

	300 while (isHTMLSpace<UChar>(m_input->currentInputChar()))

	301 consume();

	302 }

	303

	304 bool MediaQueryTokenizer::consumeIfNext(UChar character)

	305 {

	306 if (m_input->currentInputChar() == character) {

	307 consume();

	308 return true;

	309 }

	310 return false;

	311 }

	312

	313 // http://www.w3.org/TR/css3-syntax/#consume-a-name

	314 String MediaQueryTokenizer::consumeName()

	315 {

	316 // FIXME: Is this as efficient as it can be?

	317 // The possibility of escape chars mandates a copy AFAICT.

	318 Vector<UChar> result;

	319 while (true) {

	320 if (isNameChar(m_input->currentInputChar())) {

	321 result.append(consume());

	322 continue;

	323 }

	324 if (nextTwoCharsAreValidEscape()) {

	325 // "consume()" fixes a spec bug.

	326 // The first code point should be consumed before consuming the esca ped code point.

	327 consume();

	328 result.append(consumeEscape());

	329 continue;

	330 }

	331 return String(result);

	332 }

	333 }

	334

	335 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point

	336 UChar MediaQueryTokenizer::consumeEscape()

	337 {

	338 UChar cc = consume();

	339 ASSERT(cc != '\n');

	340 if (isASCIIHexDigit(cc)) {

	341 unsigned consumedHexDigits = 1;

	342 String hexChars;

	343 do {

	344 hexChars.append(cc);

	345 cc = consume();

	346 consumedHexDigits++;

	347 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));

	348 bool ok = false;

	349 UChar codePoint = hexChars.toUIntStrict(&ok, 16);

	350 if (!ok)

	351 return WTF::Unicode::replacementCharacter;

	352 return codePoint;

	353 }

	354

	355 // Replaces NULLs with replacement characters, since we do not perform prepr ocessing

	356 if (cc == kEndOfFileMarker)

	357 return WTF::Unicode::replacementCharacter;

	358 return cc;

	359 }

	360

	361 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()

	362 {

	363 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));

	364 }

	365

	366 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number

	367 bool MediaQueryTokenizer::nextCharsAreNumber()

	368 {

	369 UChar first = m_input->currentInputChar();

	370 UChar second = m_input->peek(1);

	371 if (isASCIIDigit(first))

	372 return true;

	373 if (first == '+' \|\| first == '-')

	374 return ((isASCIIDigit(second)) \|\| (second == '.' && isASCIIDigit(m_input ->peek(2))));

	375 if (first =='.')

	376 return (isASCIIDigit(second));

	377 return false;

	378 }

	379

	380 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier

	381 bool MediaQueryTokenizer::nextCharsAreIdentifier()

	382 {

	383 UChar firstChar = m_input->currentInputChar();

	384 if (isNameStart(firstChar) \|\| nextTwoCharsAreValidEscape())

	385 return true;

	386

	387 if (firstChar == '-') {

	388 if (isNameStart(m_input->peek(1)))

	389 return true;

	390 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));

	391 }

	392

	393 return false;

	394 }

	395

	396 } // namespace WebCore

OLD	NEW

« Source/core/css/parser/CSSToken.h ('K') | « Source/core/css/parser/MediaQueryTokenizer.h ('k') | Source/core/css/parser/MediaQueryTokenizerTest.cpp » ('j') | no next file with comments »