Source/core/css/parser/MediaQueryTokenizer.cpp - Issue 171383002: A thread-safe Media Query Parser

Side by Side Diff: Source/core/css/parser/MediaQueryTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Moar rebase Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2014 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "config.h"

	6 #include "core/css/parser/MediaQueryTokenizer.h"

	7

	8 #include "core/css/parser/MediaQueryInputStream.h"

	9 #include "core/html/parser/HTMLParserIdioms.h"

	10 #include "wtf/unicode/CharacterNames.h"

	11

	12 namespace WebCore {

	13

	14 const unsigned codePointsNumber = SCHAR_MAX;

	15

	16 class MediaQueryTokenizer::CodePoints {

	17 public:

	18 MediaQueryTokenizer::CodePoint codePoints[codePointsNumber];

	19

	20 // FIXME: Move the codePoint array to be a static one, generated by build sc ripts

	21 CodePoints()

	22 {

	23 memset(codePoints, 0, codePointsNumber);

	24 codePoints['\n'] = &MediaQueryTokenizer::whiteSpace;

	25 codePoints['\r'] = &MediaQueryTokenizer::whiteSpace;

	26 codePoints['\t'] = &MediaQueryTokenizer::whiteSpace;

	27 codePoints[' '] = &MediaQueryTokenizer::whiteSpace;

	28 codePoints['\f'] = &MediaQueryTokenizer::whiteSpace;

	29 codePoints['('] = &MediaQueryTokenizer::leftParenthesis;

	30 codePoints[')'] = &MediaQueryTokenizer::rightParenthesis;

	31 codePoints['+'] = &MediaQueryTokenizer::plusOrFullStop;

	32 codePoints['.'] = &MediaQueryTokenizer::plusOrFullStop;

	33 codePoints[','] = &MediaQueryTokenizer::comma;

	34 codePoints['-'] = &MediaQueryTokenizer::hyphenMinus;

	35 codePoints['/'] = &MediaQueryTokenizer::solidus;

	36 codePoints[':'] = &MediaQueryTokenizer::colon;

	37 codePoints[';'] = &MediaQueryTokenizer::semiColon;

	38 codePoints['\\'] = &MediaQueryTokenizer::reverseSolidus;

	39 for (unsigned char digit = '0'; digit <= '9'; ++digit)

	40 codePoints[digit] = &MediaQueryTokenizer::asciiDigit;

	41 for (unsigned char alpha = 'a'; alpha <= 'z'; ++alpha)

	42 codePoints[alpha] = &MediaQueryTokenizer::nameStart;

	43 for (unsigned char alpha = 'A'; alpha <= 'Z'; ++alpha)

	44 codePoints[alpha] = &MediaQueryTokenizer::nameStart;

	45 codePoints['_'] = &MediaQueryTokenizer::nameStart;

	46 codePoints[kEndOfFileMarker] = &MediaQueryTokenizer::endOfFile;

	47 }

	48 };

	49

	50 MediaQueryTokenizer::CodePoints* MediaQueryTokenizer::codePoints()

	51 {

	52 static CodePoints codePoints;

	53 return &codePoints;

	54 }

	55

	56 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point

	57 static bool isNameStart(UChar c)

	58 {

	59 if (isASCIIAlpha(c))

	60 return true;

	61 if (c == '_')

	62 return true;

	63 return !isASCII(c);

	64 }

	65

	66 // http://www.w3.org/TR/css-syntax-3/#name-code-point

	67 static bool isNameChar(UChar c)

	68 {

	69 return isNameStart(c) \|\| isASCIIDigit(c) \|\| c == '-';

	70 }

	71

	72 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e

	73 static bool twoCharsAreValidEscape(UChar first, UChar second)

	74 {

	75 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ;

	76 }

	77

	78 MediaQueryTokenizer::MediaQueryTokenizer(MediaQueryInputStream& inputStream)

	79 : m_input(inputStream)

	80 {

	81 }

	82

	83 void MediaQueryTokenizer::reconsume(UChar c)

	84 {

	85 m_input.pushBack(c);

	86 }

	87

	88 UChar MediaQueryTokenizer::consume()

	89 {

	90 UChar current = m_input.currentInputChar();

	91 m_input.advance();

	92 return current;

	93 }

	94

	95 void MediaQueryTokenizer::consume(unsigned offset)

	96 {

	97 m_input.advance(offset);

	98 }

	99

	100 MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc)

	101 {

	102 // CSS Tokenization is currently lossy, but we could record

	103 // the exact whitespace instead of discarding it here.

	104 consumeUntilNonWhitespace();

	105 return MediaQueryToken(WhitespaceToken);

	106 }

	107

	108 MediaQueryToken MediaQueryTokenizer::leftParenthesis(UChar cc)

	109 {

	110 return MediaQueryToken(LeftParenthesisToken);

	111 }

	112

	113 MediaQueryToken MediaQueryTokenizer::rightParenthesis(UChar cc)

	114 {

	115 return MediaQueryToken(RightParenthesisToken);

	116 }

	117

	118 MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc)

	119 {

	120 if (nextCharsAreNumber()) {

	121 reconsume(cc);

	122 return consumeNumericToken();

	123 }

	124 return MediaQueryToken(DelimiterToken, cc);

	125 }

	126

	127 MediaQueryToken MediaQueryTokenizer::comma(UChar cc)

	128 {

	129 return MediaQueryToken(CommaToken);

	130 }

	131

	132 MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc)

	133 {

	134 if (nextCharsAreNumber()) {

	135 reconsume(cc);

	136 return consumeNumericToken();

	137 }

	138 if (nextCharsAreIdentifier()) {

	139 reconsume(cc);

	140 return consumeIdentLikeToken();

	141 }

	142 return MediaQueryToken(DelimiterToken, cc);

	143 }

	144

	145 MediaQueryToken MediaQueryTokenizer::solidus(UChar cc)

	146 {

	147 return MediaQueryToken(DelimiterToken, cc);

	148 }

	149

	150 MediaQueryToken MediaQueryTokenizer::colon(UChar cc)

	151 {

	152 return MediaQueryToken(ColonToken);

	153 }

	154

	155 MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc)

	156 {

	157 return MediaQueryToken(SemicolonToken);

	158 }

	159

	160 MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc)

	161 {

	162 if (twoCharsAreValidEscape(cc, m_input.currentInputChar())) {

	163 reconsume(cc);

	164 return consumeIdentLikeToken();

	165 }

	166 return MediaQueryToken(DelimiterToken, cc);

	167 }

	168

	169 MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc)

	170 {

	171 reconsume(cc);

	172 return consumeNumericToken();

	173 }

	174

	175 MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc)

	176 {

	177 reconsume(cc);

	178 return consumeIdentLikeToken();

	179 }

	180

	181 MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc)

	182 {

	183 return MediaQueryToken(EOFToken);

	184 }

	185

	186 void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTo kens)

	187 {

	188 // According to the spec, we should perform preprocessing here.

	189 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing

	190 //

	191 // However, we can skip this step since:

	192 // * We're using HTML spaces (which accept \r and \f as a valid white space)

	193 // * Do not count white spaces

	194 // * consumeEscape replaces NULLs for replacement characters

	195

	196 MediaQueryInputStream input(string);

	197 MediaQueryTokenizer tokenizer(input);

	198 while (true) {

	199 outTokens.append(tokenizer.nextToken());

	200 if (outTokens.last().type() == EOFToken)

	201 return;

	202 }

	203 }

	204

	205 MediaQueryToken MediaQueryTokenizer::nextToken()

	206 {

	207 // Unlike the HTMLTokenizer, the CSS Syntax spec is written

	208 // as a stateless, (fixed-size) look-ahead tokenizer.

	209 // We could move to the stateful model and instead create

	210 // states for all the "next 3 codepoints are X" cases.

	211 // State-machine tokenizers are easier to write to handle

	212 // incremental tokenization of partial sources.

	213 // However, for now we follow the spec exactly.

	214 UChar cc = consume();

	215 CodePoint codePointFunc = 0;

	216

	217 if (isASCII(cc)) {

	218 ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber);

	219 codePointFunc = codePoints()->codePoints[cc];

	220 } else {

	221 codePointFunc = &MediaQueryTokenizer::nameStart;

	222 }

	223

	224 if (codePointFunc)

	225 return ((this)->*(codePointFunc))(cc);

	226

	227 return MediaQueryToken(DelimiterToken, cc);

	228 }

	229

	230 static int getSign(MediaQueryInputStream& input, unsigned& offset)

	231 {

	232 int sign = 1;

	233 if (input.currentInputChar() == '+') {

	234 ++offset;

	235 } else if (input.peek(offset) == '-') {

	236 sign = -1;

	237 ++offset;

	238 }

	239 return sign;

	240 }

	241

	242 static unsigned long long getInteger(MediaQueryInputStream& input, unsigned& off set)

	243 {

	244 unsigned intStartPos = offset;

	245 offset = input.skipWhilePredicate<isASCIIDigit>(offset);

	246 unsigned intEndPos = offset;

	247 return input.getUInt(intStartPos, intEndPos);

	248 }

	249

	250 static double getFraction(MediaQueryInputStream& input, unsigned& offset, unsign ed& digitsNumber)

	251 {

	252 unsigned fractionStartPos = 0;

	253 unsigned fractionEndPos = 0;

	254 if (input.peek(offset) == '.' && isASCIIDigit(input.peek(++offset))) {

	255 fractionStartPos = offset - 1;

	256 offset = input.skipWhilePredicate<isASCIIDigit>(offset);

	257 fractionEndPos = offset;

	258 }

	259 digitsNumber = fractionEndPos- fractionStartPos;

	260 return input.getDouble(fractionStartPos, fractionEndPos);

	261 }

	262

	263 static unsigned long long getExponent(MediaQueryInputStream& input, unsigned& of fset, int sign)

	264 {

	265 unsigned exponentStartPos = 0;

	266 unsigned exponentEndPos = 0;

	267 if ((input.peek(offset) == 'E' \|\| input.peek(offset) == 'e')) {

	268 int offsetBeforeExponent = offset;

	269 ++offset;

	270 if (input.peek(offset) == '+') {

	271 ++offset;

	272 } else if (input.peek(offset) =='-') {

	273 sign = -1;

	274 ++offset;

	275 }

	276 exponentStartPos = offset;

	277 offset = input.skipWhilePredicate<isASCIIDigit>(offset);

	278 exponentEndPos = offset;

	279 if (exponentEndPos == exponentStartPos)

	280 offset = offsetBeforeExponent;

	281 }

	282 return input.getUInt(exponentStartPos, exponentEndPos);

	283 }

	284

	285 // This method merges the following spec sections for efficiency

	286 // http://www.w3.org/TR/css3-syntax/#consume-a-number

	287 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number

	288 MediaQueryToken MediaQueryTokenizer::consumeNumber()

	289 {

	290 ASSERT(nextCharsAreNumber());

	291 NumericValueType type = IntegerValueType;

	292 double value = 0;

	293 unsigned offset = 0;

	294 int exponentSign = 1;

	295 unsigned fractionDigits;

	296 int sign = getSign(m_input, offset);

	297 unsigned long long integerPart = getInteger(m_input, offset);

	298 double fractionPart = getFraction(m_input, offset, fractionDigits);

	299 unsigned long long exponentPart = getExponent(m_input, offset, exponentSign) ;

	300 double exponent = pow(10, (float)exponentSign * (double)exponentPart);

	301 value = (double)sign * ((double)integerPart + fractionPart) * exponent;

	302

	303 m_input.advance(offset);

	304 if (fractionDigits > 0)

	305 type = NumberValueType;

	306

	307 return MediaQueryToken(NumberToken, value, type);

	308 }

	309

	310 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token

	311 MediaQueryToken MediaQueryTokenizer::consumeNumericToken()

	312 {

	313 MediaQueryToken token = consumeNumber();

	314 if (nextCharsAreIdentifier())

	315 token.convertToDimensionWithUnit(consumeName());

	316 else if (consumeIfNext('%'))

	317 token.convertToPercentage();

	318 return token;

	319 }

	320

	321 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token

	322 MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken()

	323 {

	324 String name = consumeName();

	325 if (consumeIfNext('('))

	326 return MediaQueryToken(FunctionToken, name);

	327 return MediaQueryToken(IdentToken, name);

	328 }

	329

	330 void MediaQueryTokenizer::consumeUntilNonWhitespace()

	331 {

	332 // Using HTML space here rather than CSS space since we don't do preprocessi ng

	333 while (isHTMLSpace<UChar>(m_input.currentInputChar()))

	334 consume();

	335 }

	336

	337 bool MediaQueryTokenizer::consumeIfNext(UChar character)

	338 {

	339 if (m_input.currentInputChar() == character) {

	340 consume();

	341 return true;

	342 }

	343 return false;

	344 }

	345

	346 // http://www.w3.org/TR/css3-syntax/#consume-a-name

	347 String MediaQueryTokenizer::consumeName()

	348 {

	349 // FIXME: Is this as efficient as it can be?

	350 // The possibility of escape chars mandates a copy AFAICT.

	351 Vector<UChar> result;

	352 while (true) {

	353 if (isNameChar(m_input.currentInputChar())) {

	354 result.append(consume());

	355 continue;

	356 }

	357 if (nextTwoCharsAreValidEscape()) {

	358 // "consume()" fixes a spec bug.

	359 // The first code point should be consumed before consuming the esca ped code point.

	360 consume();

	361 result.append(consumeEscape());

	362 continue;

	363 }

	364 return String(result);

	365 }

	366 }

	367

	368 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point

	369 UChar MediaQueryTokenizer::consumeEscape()

	370 {

	371 UChar cc = consume();

	372 ASSERT(cc != '\n');

	373 if (isASCIIHexDigit(cc)) {

	374 unsigned consumedHexDigits = 1;

	375 String hexChars;

	376 do {

	377 hexChars.append(cc);

	378 cc = consume();

	379 consumedHexDigits++;

	380 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));

	381 bool ok = false;

	382 UChar codePoint = hexChars.toUIntStrict(&ok, 16);

	383 if (!ok)

	384 return WTF::Unicode::replacementCharacter;

	385 return codePoint;

	386 }

	387

	388 // Replaces NULLs with replacement characters, since we do not perform prepr ocessing

	389 if (cc == kEndOfFileMarker)

	390 return WTF::Unicode::replacementCharacter;

	391 return cc;

	392 }

	393

	394 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()

	395 {

	396 if (m_input.leftChars() < 2)

	397 return false;

	398 return twoCharsAreValidEscape(m_input.peek(1), m_input.peek(2));

	399 }

	400

	401 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number

	402 bool MediaQueryTokenizer::nextCharsAreNumber()

	403 {

	404 UChar first = m_input.currentInputChar();

	405 UChar second = m_input.peek(1);

	406 if (isASCIIDigit(first))

	407 return true;

	408 if (first == '+' \|\| first == '-')

	409 return ((isASCIIDigit(second)) \|\| (second == '.' && isASCIIDigit(m_input .peek(2))));

	410 if (first =='.')

	411 return (isASCIIDigit(second));

	412 return false;

	413 }

	414

	415 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier

	416 bool MediaQueryTokenizer::nextCharsAreIdentifier()

	417 {

	418 UChar firstChar = m_input.currentInputChar();

	419 if (isNameStart(firstChar) \|\| nextTwoCharsAreValidEscape())

	420 return true;

	421

	422 if (firstChar == '-') {

	423 if (isNameStart(m_input.peek(1)))

	424 return true;

	425 return nextTwoCharsAreValidEscape();

	426 }

	427

	428 return false;

	429 }

	430

	431 } // namespace WebCore

OLD	NEW

« Source/core/css/CSSPrimitiveValue.cpp ('K') | « Source/core/css/parser/MediaQueryTokenizer.h ('k') | Source/core/css/parser/MediaQueryTokenizerTest.cpp » ('j') | no next file with comments »