Source/core/css/parser/MediaQueryTokenizer.cpp - Issue 171383002: A thread-safe Media Query Parser

Side by Side Diff: Source/core/css/parser/MediaQueryTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Another attempt to fix Android build issues Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (C) 2013 Google Inc. All rights reserved.

	3 *

	4 * Redistribution and use in source and binary forms, with or without

	5 * modification, are permitted provided that the following conditions are

	6 * met:

	7 *

	8 * * Redistributions of source code must retain the above copyright

	9 * notice, this list of conditions and the following disclaimer.

	10 * * Redistributions in binary form must reproduce the above

	11 * copyright notice, this list of conditions and the following disclaimer

	12 * in the documentation and/or other materials provided with the

	13 * distribution.

	14 * * Neither the name of Google Inc. nor the names of its

	15 * contributors may be used to endorse or promote products derived from

	16 * this software without specific prior written permission.

	17 *

	18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	29 */

	30

	31 #include "config.h"

	32 #include "core/css/parser/MediaQueryTokenizer.h"

	33

	34 #include "core/css/parser/CSSInputStream.h"

	35 #include "core/html/parser/HTMLParserIdioms.h"

	36 #include "wtf/unicode/CharacterNames.h"

	37 #include <cfloat>

	38

	39 namespace WebCore {

	40

	41 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point

	42 static bool isNameStart(UChar c)

	43 {

	44 if (isASCIIAlpha(c))

	45 return true;

	46 if (c == '_')

	47 return true;

	48 return !isASCII(c);

	49 }

	50

	51 // http://www.w3.org/TR/css-syntax-3/#name-code-point

	52 static bool isNameChar(UChar c)

	53 {

	54 return isNameStart(c) \|\| isASCIIDigit(c) \|\| c == '-';

	55 }

	56

	57 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e

	58 static bool twoCharsAreValidEscape(UChar first, UChar second)

	59 {

	60 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ;

	61 }

	62

	63 MediaQueryTokenizer::MediaQueryTokenizer()

	64 {

	65 }

	66

	67 void MediaQueryTokenizer::reconsume(UChar c)

	68 {

	69 m_input->pushBack(c);

	70 }

	71

	72 UChar MediaQueryTokenizer::consume()

	73 {

	74 UChar current = m_input->currentInputChar();

	75 m_input->advance();

	76 return current;

	77 }

	78

	79 void MediaQueryTokenizer::consume(unsigned offset)

	80 {

	81 m_input->advance(offset);

	82 }

	83

	84 CSSToken MediaQueryTokenizer::whiteSpace(UChar cc)

	85 {

	86 // CSS Tokenization is currently lossy, but we could record

	87 // the exact whitespace instead of discarding it here.

	88 consumeUntilNotWhitespace();

	89 return CSSToken(WhitespaceToken);

	90 }

	91

	92 CSSToken MediaQueryTokenizer::leftParen(UChar cc)

	93 {

	94 return CSSToken(LeftParenToken);

	95 }

	96

	97 CSSToken MediaQueryTokenizer::rightParen(UChar cc)

	98 {

	99 return CSSToken(RightParenToken);

	100 }

	101

	102 CSSToken MediaQueryTokenizer::plusOrFullStop(UChar cc)

	103 {

	104 if (nextCharsAreNumber()) {

	105 reconsume(cc);

	106 return consumeNumericToken();

	107 }

	108 return CSSToken(DelimToken, cc);

	109 }

	110

	111 CSSToken MediaQueryTokenizer::comma(UChar cc)

	112 {

	113 return CSSToken(CommaToken);

	114 }

	115

	116 CSSToken MediaQueryTokenizer::hyphenMinus(UChar cc)

	117 {

	118 if (nextCharsAreNumber()) {

	119 reconsume(cc);

	120 return consumeNumericToken();

	121 }

	122 if (nextCharsAreIdentifier()) {

	123 reconsume(cc);

	124 return consumeIdentLikeToken();

	125 }

	126 return CSSToken(DelimToken, cc);

	127 }

	128

	129 CSSToken MediaQueryTokenizer::solidus(UChar cc)

	130 {

	131 return CSSToken(DelimToken, cc);

	132 }

	133

	134 CSSToken MediaQueryTokenizer::colon(UChar cc)

	135 {

	136 return CSSToken(ColonToken);

	137 }

	138

	139 CSSToken MediaQueryTokenizer::semiColon(UChar cc)

	140 {

	141 return CSSToken(SemicolonToken);

	142 }

	143

	144 CSSToken MediaQueryTokenizer::reverseSolidus(UChar cc)

	145 {

	146 if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) {

	147 reconsume(cc);

	148 return consumeIdentLikeToken();

	149 }

	150 return CSSToken(DelimToken, cc);

	151 }

	152

	153 CSSToken MediaQueryTokenizer::asciiDigit(UChar cc)

	154 {

	155 reconsume(cc);

	156 return consumeNumericToken();

	157 }

	158

	159 CSSToken MediaQueryTokenizer::nameStart(UChar cc)

	160 {

	161 reconsume(cc);

	162 return consumeIdentLikeToken();

	163 }

	164

	165 CSSToken MediaQueryTokenizer::endOfFile(UChar cc)

	166 {

	167 return CSSToken(EOFToken);

	168 }

	169

	170 void MediaQueryTokenizer::tokenize(String string, Vector<CSSToken>& outTokens)

	171 {

	172 MediaQueryTokenizer tokenizer;

	173 // According to the spec, we should perform preprocessing here.

	174 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing

	175 //

	176 // However, we can skip this step since:

	177 // * We're using HTML spaces (which accept \r and \f as a valid white space)

	178 // * Do not count white spaces

	179 // * consumeEscape replaces NULLs for replacement characters

	180

	181 CSSInputStream input(string);

	182 while (true) {

	183 outTokens.append(tokenizer.nextToken(input));

	184 if (outTokens.last().type() == EOFToken)

	185 return;

	186 }

	187 }

	188

	189 CSSToken MediaQueryTokenizer::nextToken(CSSInputStream& input)

	190 {

	191 // Unlike the HTMLTokenizer, the CSS Syntax spec is written

	192 // as a stateless, (fixed-size) look-ahead tokenizer.

	193 // We could move to the stateful model and instead create

	194 // states for all the "next 3 codepoints are X" cases.

	195 // State-machine tokenizers are easier to write to handle

	196 // incremental tokenization of partial sources.

	197 // However, for now we follow the spec exactly.

	198 m_input = &input;

	199 UChar cc = consume();

	200 CodePoint codePointFunc = 0;

	201

	202 if (isASCII(cc)) {

	203 ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM);

	204 codePointFunc = getCodePoints()->codePoints[cc];

	205 } else {

	206 codePointFunc = &MediaQueryTokenizer::nameStart;

	207 }

	208

	209 if (codePointFunc)

	210 return ((this)->*(codePointFunc))(cc);

	211

	212 return CSSToken(DelimToken, cc);

	213 }

	214

	215 // This method merges the following spec sections for efficiency

	216 // http://www.w3.org/TR/css3-syntax/#consume-a-number

	217 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number

	218 CSSToken MediaQueryTokenizer::consumeNumber()

	219 {

	220 ASSERT(nextCharsAreNumber());

	221 NumericValueType type = IntegerValueType;

	222 double value = 0;

	223 int sign = 1;

	224 unsigned peekOffset = 0;

	225 int exponentSign = 1;

	226 unsigned exponentStartPos = 0;

	227 unsigned exponentEndPos = 0;

	228 unsigned fractionStartPos = 0;

	229 unsigned fractionEndPos = 0;

	230 unsigned long long integerPart;

	231 unsigned long long fractionPart;

	232 unsigned fractionDigits;

	233 unsigned long long exponentPart;

	234 if (m_input->currentInputChar() == '+') {

	235 ++peekOffset;

	236 } else if (m_input->peek(peekOffset) == '-') {

	237 sign = -1;

	238 ++peekOffset;

	239 }

	240 unsigned intStartPos = peekOffset;

	241 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

	242 unsigned intEndPos = peekOffset;

	243 if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOff set))) {

	244 fractionStartPos = peekOffset;

	245 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

	246 fractionEndPos = peekOffset;

	247 }

	248 if ((m_input->peek(peekOffset) == 'E' \|\| m_input->peek(peekOffset) == 'e')) {

	249 int peekOffsetBeforeExponent = peekOffset;

	250 ++peekOffset;

	251 if (m_input->peek(peekOffset) == '+') {

	252 ++peekOffset;

	253 } else if (m_input->peek(peekOffset) =='-') {

	254 exponentSign = -1;

	255 ++peekOffset;

	256 }

	257 exponentStartPos = peekOffset;

	258 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);

	259 exponentEndPos = peekOffset;

	260 if (exponentEndPos == exponentStartPos)

	261 peekOffset = peekOffsetBeforeExponent;

	262 }

	263 integerPart = m_input->getNumber(intStartPos, intEndPos);

	264 fractionDigits = fractionEndPos - fractionStartPos;

	265 unsigned floatingFractionEndPos = fractionEndPos;

	266 if (fractionDigits > DBL_DIG) {

	267 // Limit the number of fraction digits, to avoid double (and fractionPar t) from overflowing

	268 fractionDigits = DBL_DIG;

	269 floatingFractionEndPos = fractionStartPos + DBL_DIG;

	270 }

	271 fractionPart = m_input->getNumber(fractionStartPos, floatingFractionEndPos);

	272 exponentPart = m_input->getNumber(exponentStartPos, exponentEndPos);

	273 double fractionDivisor = pow((double)10.0, (double)(fractionDigits));

	274 double exponent = pow(10, (float)exponentSign * (double)exponentPart);

	275 value = (double)sign * ((double)integerPart + (double)fractionPart / fractio nDivisor) * exponent;

	276

	277 m_input->advance(peekOffset);

	278 if (fractionDigits > 0)

	279 type = NumberValueType;

	280

	281 return CSSToken(NumberToken, value, type);

	282 }

	283

	284 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token

	285 CSSToken MediaQueryTokenizer::consumeNumericToken()

	286 {

	287 CSSToken token = consumeNumber();

	288 if (nextCharsAreIdentifier())

	289 token.convertToDimensionWithUnit(consumeName());

	290 else if (consumeIfNext('%'))

	291 token.convertToPercentage();

	292 return token;

	293 }

	294

	295 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token

	296 CSSToken MediaQueryTokenizer::consumeIdentLikeToken()

	297 {

	298 String name = consumeName();

	299 if (consumeIfNext('('))

	300 return CSSToken(FunctionToken, name);

	301 return CSSToken(IdentToken, name);

	302 }

	303

	304 void MediaQueryTokenizer::consumeUntilNotWhitespace()

	305 {

	306 // Using HTML space here rather than CSS space since we don't do preprocessi ng

	307 while (isHTMLSpace<UChar>(m_input->currentInputChar()))

	308 consume();

	309 }

	310

	311 bool MediaQueryTokenizer::consumeIfNext(UChar character)

	312 {

	313 if (m_input->currentInputChar() == character) {

	314 consume();

	315 return true;

	316 }

	317 return false;

	318 }

	319

	320 // http://www.w3.org/TR/css3-syntax/#consume-a-name

	321 String MediaQueryTokenizer::consumeName()

	322 {

	323 // FIXME: Is this as efficient as it can be?

	324 // The possibility of escape chars mandates a copy AFAICT.

	325 Vector<UChar> result;

	326 while (true) {

	327 if (isNameChar(m_input->currentInputChar())) {

	328 result.append(consume());

	329 continue;

	330 }

	331 if (nextTwoCharsAreValidEscape()) {

	332 // "consume()" fixes a spec bug.

	333 // The first code point should be consumed before consuming the esca ped code point.

	334 consume();

	335 result.append(consumeEscape());

	336 continue;

	337 }

	338 return String(result);

	339 }

	340 }

	341

	342 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point

	343 UChar MediaQueryTokenizer::consumeEscape()

	344 {

	345 UChar cc = consume();

	346 ASSERT(cc != '\n');

	347 if (isASCIIHexDigit(cc)) {

	348 unsigned consumedHexDigits = 1;

	349 String hexChars;

	350 do {

	351 hexChars.append(cc);

	352 cc = consume();

	353 consumedHexDigits++;

	354 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));

	355 bool ok = false;

	356 UChar codePoint = hexChars.toUIntStrict(&ok, 16);

	357 if (!ok)

	358 return WTF::Unicode::replacementCharacter;

	359 return codePoint;

	360 }

	361

	362 // Replaces NULLs with replacement characters, since we do not perform prepr ocessing

	363 if (cc == kEndOfFileMarker)

	364 return WTF::Unicode::replacementCharacter;

	365 return cc;

	366 }

	367

	368 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()

	369 {

	370 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));

	371 }

	372

	373 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number

	374 bool MediaQueryTokenizer::nextCharsAreNumber()

	375 {

	376 UChar first = m_input->currentInputChar();

	377 UChar second = m_input->peek(1);

	378 if (isASCIIDigit(first))

	379 return true;

	380 if (first == '+' \|\| first == '-')

	381 return ((isASCIIDigit(second)) \|\| (second == '.' && isASCIIDigit(m_input ->peek(2))));

	382 if (first =='.')

	383 return (isASCIIDigit(second));

	384 return false;

	385 }

	386

	387 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier

	388 bool MediaQueryTokenizer::nextCharsAreIdentifier()

	389 {

	390 UChar firstChar = m_input->currentInputChar();

	391 if (isNameStart(firstChar) \|\| nextTwoCharsAreValidEscape())

	392 return true;

	393

	394 if (firstChar == '-') {

	395 if (isNameStart(m_input->peek(1)))

	396 return true;

	397 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));

	398 }

	399

	400 return false;

	401 }

	402

	403 } // namespace WebCore

OLD	NEW

« Source/core/css/MediaQueryExp.cpp ('K') | « Source/core/css/parser/MediaQueryTokenizer.h ('k') | Source/core/css/parser/MediaQueryTokenizerTest.cpp » ('j') | no next file with comments »