Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(364)

Side by Side Diff: Source/core/css/parser/MediaQueryTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Moar rebase Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "config.h"
6 #include "core/css/parser/MediaQueryTokenizer.h"
7
8 #include "core/css/parser/MediaQueryInputStream.h"
9 #include "core/html/parser/HTMLParserIdioms.h"
10 #include "wtf/unicode/CharacterNames.h"
11
12 namespace WebCore {
13
14 const unsigned codePointsNumber = SCHAR_MAX;
15
16 class MediaQueryTokenizer::CodePoints {
17 public:
18 MediaQueryTokenizer::CodePoint codePoints[codePointsNumber];
19
20 // FIXME: Move the codePoint array to be a static one, generated by build sc ripts
21 CodePoints()
22 {
23 memset(codePoints, 0, codePointsNumber);
24 codePoints['\n'] = &MediaQueryTokenizer::whiteSpace;
25 codePoints['\r'] = &MediaQueryTokenizer::whiteSpace;
26 codePoints['\t'] = &MediaQueryTokenizer::whiteSpace;
27 codePoints[' '] = &MediaQueryTokenizer::whiteSpace;
28 codePoints['\f'] = &MediaQueryTokenizer::whiteSpace;
29 codePoints['('] = &MediaQueryTokenizer::leftParenthesis;
30 codePoints[')'] = &MediaQueryTokenizer::rightParenthesis;
31 codePoints['+'] = &MediaQueryTokenizer::plusOrFullStop;
32 codePoints['.'] = &MediaQueryTokenizer::plusOrFullStop;
33 codePoints[','] = &MediaQueryTokenizer::comma;
34 codePoints['-'] = &MediaQueryTokenizer::hyphenMinus;
35 codePoints['/'] = &MediaQueryTokenizer::solidus;
36 codePoints[':'] = &MediaQueryTokenizer::colon;
37 codePoints[';'] = &MediaQueryTokenizer::semiColon;
38 codePoints['\\'] = &MediaQueryTokenizer::reverseSolidus;
39 for (unsigned char digit = '0'; digit <= '9'; ++digit)
40 codePoints[digit] = &MediaQueryTokenizer::asciiDigit;
41 for (unsigned char alpha = 'a'; alpha <= 'z'; ++alpha)
42 codePoints[alpha] = &MediaQueryTokenizer::nameStart;
43 for (unsigned char alpha = 'A'; alpha <= 'Z'; ++alpha)
44 codePoints[alpha] = &MediaQueryTokenizer::nameStart;
45 codePoints['_'] = &MediaQueryTokenizer::nameStart;
46 codePoints[kEndOfFileMarker] = &MediaQueryTokenizer::endOfFile;
47 }
48 };
49
50 MediaQueryTokenizer::CodePoints* MediaQueryTokenizer::codePoints()
51 {
52 static CodePoints codePoints;
53 return &codePoints;
54 }
55
56 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point
57 static bool isNameStart(UChar c)
58 {
59 if (isASCIIAlpha(c))
60 return true;
61 if (c == '_')
62 return true;
63 return !isASCII(c);
64 }
65
66 // http://www.w3.org/TR/css-syntax-3/#name-code-point
67 static bool isNameChar(UChar c)
68 {
69 return isNameStart(c) || isASCIIDigit(c) || c == '-';
70 }
71
72 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e
73 static bool twoCharsAreValidEscape(UChar first, UChar second)
74 {
75 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ;
76 }
77
78 MediaQueryTokenizer::MediaQueryTokenizer(MediaQueryInputStream& inputStream)
79 : m_input(inputStream)
80 {
81 }
82
83 void MediaQueryTokenizer::reconsume(UChar c)
84 {
85 m_input.pushBack(c);
86 }
87
88 UChar MediaQueryTokenizer::consume()
89 {
90 UChar current = m_input.currentInputChar();
91 m_input.advance();
92 return current;
93 }
94
95 void MediaQueryTokenizer::consume(unsigned offset)
96 {
97 m_input.advance(offset);
98 }
99
100 MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc)
101 {
102 // CSS Tokenization is currently lossy, but we could record
103 // the exact whitespace instead of discarding it here.
104 consumeUntilNonWhitespace();
105 return MediaQueryToken(WhitespaceToken);
106 }
107
108 MediaQueryToken MediaQueryTokenizer::leftParenthesis(UChar cc)
109 {
110 return MediaQueryToken(LeftParenthesisToken);
111 }
112
113 MediaQueryToken MediaQueryTokenizer::rightParenthesis(UChar cc)
114 {
115 return MediaQueryToken(RightParenthesisToken);
116 }
117
118 MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc)
119 {
120 if (nextCharsAreNumber()) {
121 reconsume(cc);
122 return consumeNumericToken();
123 }
124 return MediaQueryToken(DelimiterToken, cc);
125 }
126
127 MediaQueryToken MediaQueryTokenizer::comma(UChar cc)
128 {
129 return MediaQueryToken(CommaToken);
130 }
131
132 MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc)
133 {
134 if (nextCharsAreNumber()) {
135 reconsume(cc);
136 return consumeNumericToken();
137 }
138 if (nextCharsAreIdentifier()) {
139 reconsume(cc);
140 return consumeIdentLikeToken();
141 }
142 return MediaQueryToken(DelimiterToken, cc);
143 }
144
145 MediaQueryToken MediaQueryTokenizer::solidus(UChar cc)
146 {
147 return MediaQueryToken(DelimiterToken, cc);
148 }
149
150 MediaQueryToken MediaQueryTokenizer::colon(UChar cc)
151 {
152 return MediaQueryToken(ColonToken);
153 }
154
155 MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc)
156 {
157 return MediaQueryToken(SemicolonToken);
158 }
159
160 MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc)
161 {
162 if (twoCharsAreValidEscape(cc, m_input.currentInputChar())) {
163 reconsume(cc);
164 return consumeIdentLikeToken();
165 }
166 return MediaQueryToken(DelimiterToken, cc);
167 }
168
169 MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc)
170 {
171 reconsume(cc);
172 return consumeNumericToken();
173 }
174
175 MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc)
176 {
177 reconsume(cc);
178 return consumeIdentLikeToken();
179 }
180
181 MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc)
182 {
183 return MediaQueryToken(EOFToken);
184 }
185
186 void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTo kens)
187 {
188 // According to the spec, we should perform preprocessing here.
189 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing
190 //
191 // However, we can skip this step since:
192 // * We're using HTML spaces (which accept \r and \f as a valid white space)
193 // * Do not count white spaces
194 // * consumeEscape replaces NULLs for replacement characters
195
196 MediaQueryInputStream input(string);
197 MediaQueryTokenizer tokenizer(input);
198 while (true) {
199 outTokens.append(tokenizer.nextToken());
200 if (outTokens.last().type() == EOFToken)
201 return;
202 }
203 }
204
205 MediaQueryToken MediaQueryTokenizer::nextToken()
206 {
207 // Unlike the HTMLTokenizer, the CSS Syntax spec is written
208 // as a stateless, (fixed-size) look-ahead tokenizer.
209 // We could move to the stateful model and instead create
210 // states for all the "next 3 codepoints are X" cases.
211 // State-machine tokenizers are easier to write to handle
212 // incremental tokenization of partial sources.
213 // However, for now we follow the spec exactly.
214 UChar cc = consume();
215 CodePoint codePointFunc = 0;
216
217 if (isASCII(cc)) {
218 ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber);
219 codePointFunc = codePoints()->codePoints[cc];
220 } else {
221 codePointFunc = &MediaQueryTokenizer::nameStart;
222 }
223
224 if (codePointFunc)
225 return ((this)->*(codePointFunc))(cc);
226
227 return MediaQueryToken(DelimiterToken, cc);
228 }
229
230 static int getSign(MediaQueryInputStream& input, unsigned& offset)
231 {
232 int sign = 1;
233 if (input.currentInputChar() == '+') {
234 ++offset;
235 } else if (input.peek(offset) == '-') {
236 sign = -1;
237 ++offset;
238 }
239 return sign;
240 }
241
242 static unsigned long long getInteger(MediaQueryInputStream& input, unsigned& off set)
243 {
244 unsigned intStartPos = offset;
245 offset = input.skipWhilePredicate<isASCIIDigit>(offset);
246 unsigned intEndPos = offset;
247 return input.getUInt(intStartPos, intEndPos);
248 }
249
250 static double getFraction(MediaQueryInputStream& input, unsigned& offset, unsign ed& digitsNumber)
251 {
252 unsigned fractionStartPos = 0;
253 unsigned fractionEndPos = 0;
254 if (input.peek(offset) == '.' && isASCIIDigit(input.peek(++offset))) {
255 fractionStartPos = offset - 1;
256 offset = input.skipWhilePredicate<isASCIIDigit>(offset);
257 fractionEndPos = offset;
258 }
259 digitsNumber = fractionEndPos- fractionStartPos;
260 return input.getDouble(fractionStartPos, fractionEndPos);
261 }
262
263 static unsigned long long getExponent(MediaQueryInputStream& input, unsigned& of fset, int sign)
264 {
265 unsigned exponentStartPos = 0;
266 unsigned exponentEndPos = 0;
267 if ((input.peek(offset) == 'E' || input.peek(offset) == 'e')) {
268 int offsetBeforeExponent = offset;
269 ++offset;
270 if (input.peek(offset) == '+') {
271 ++offset;
272 } else if (input.peek(offset) =='-') {
273 sign = -1;
274 ++offset;
275 }
276 exponentStartPos = offset;
277 offset = input.skipWhilePredicate<isASCIIDigit>(offset);
278 exponentEndPos = offset;
279 if (exponentEndPos == exponentStartPos)
280 offset = offsetBeforeExponent;
281 }
282 return input.getUInt(exponentStartPos, exponentEndPos);
283 }
284
285 // This method merges the following spec sections for efficiency
286 // http://www.w3.org/TR/css3-syntax/#consume-a-number
287 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number
288 MediaQueryToken MediaQueryTokenizer::consumeNumber()
289 {
290 ASSERT(nextCharsAreNumber());
291 NumericValueType type = IntegerValueType;
292 double value = 0;
293 unsigned offset = 0;
294 int exponentSign = 1;
295 unsigned fractionDigits;
296 int sign = getSign(m_input, offset);
297 unsigned long long integerPart = getInteger(m_input, offset);
298 double fractionPart = getFraction(m_input, offset, fractionDigits);
299 unsigned long long exponentPart = getExponent(m_input, offset, exponentSign) ;
300 double exponent = pow(10, (float)exponentSign * (double)exponentPart);
301 value = (double)sign * ((double)integerPart + fractionPart) * exponent;
302
303 m_input.advance(offset);
304 if (fractionDigits > 0)
305 type = NumberValueType;
306
307 return MediaQueryToken(NumberToken, value, type);
308 }
309
310 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token
311 MediaQueryToken MediaQueryTokenizer::consumeNumericToken()
312 {
313 MediaQueryToken token = consumeNumber();
314 if (nextCharsAreIdentifier())
315 token.convertToDimensionWithUnit(consumeName());
316 else if (consumeIfNext('%'))
317 token.convertToPercentage();
318 return token;
319 }
320
321 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token
322 MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken()
323 {
324 String name = consumeName();
325 if (consumeIfNext('('))
326 return MediaQueryToken(FunctionToken, name);
327 return MediaQueryToken(IdentToken, name);
328 }
329
330 void MediaQueryTokenizer::consumeUntilNonWhitespace()
331 {
332 // Using HTML space here rather than CSS space since we don't do preprocessi ng
333 while (isHTMLSpace<UChar>(m_input.currentInputChar()))
334 consume();
335 }
336
337 bool MediaQueryTokenizer::consumeIfNext(UChar character)
338 {
339 if (m_input.currentInputChar() == character) {
340 consume();
341 return true;
342 }
343 return false;
344 }
345
346 // http://www.w3.org/TR/css3-syntax/#consume-a-name
347 String MediaQueryTokenizer::consumeName()
348 {
349 // FIXME: Is this as efficient as it can be?
350 // The possibility of escape chars mandates a copy AFAICT.
351 Vector<UChar> result;
352 while (true) {
353 if (isNameChar(m_input.currentInputChar())) {
354 result.append(consume());
355 continue;
356 }
357 if (nextTwoCharsAreValidEscape()) {
358 // "consume()" fixes a spec bug.
359 // The first code point should be consumed before consuming the esca ped code point.
360 consume();
361 result.append(consumeEscape());
362 continue;
363 }
364 return String(result);
365 }
366 }
367
368 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point
369 UChar MediaQueryTokenizer::consumeEscape()
370 {
371 UChar cc = consume();
372 ASSERT(cc != '\n');
373 if (isASCIIHexDigit(cc)) {
374 unsigned consumedHexDigits = 1;
375 String hexChars;
376 do {
377 hexChars.append(cc);
378 cc = consume();
379 consumedHexDigits++;
380 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));
381 bool ok = false;
382 UChar codePoint = hexChars.toUIntStrict(&ok, 16);
383 if (!ok)
384 return WTF::Unicode::replacementCharacter;
385 return codePoint;
386 }
387
388 // Replaces NULLs with replacement characters, since we do not perform prepr ocessing
389 if (cc == kEndOfFileMarker)
390 return WTF::Unicode::replacementCharacter;
391 return cc;
392 }
393
394 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()
395 {
396 if (m_input.leftChars() < 2)
397 return false;
398 return twoCharsAreValidEscape(m_input.peek(1), m_input.peek(2));
399 }
400
401 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number
402 bool MediaQueryTokenizer::nextCharsAreNumber()
403 {
404 UChar first = m_input.currentInputChar();
405 UChar second = m_input.peek(1);
406 if (isASCIIDigit(first))
407 return true;
408 if (first == '+' || first == '-')
409 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input .peek(2))));
410 if (first =='.')
411 return (isASCIIDigit(second));
412 return false;
413 }
414
415 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier
416 bool MediaQueryTokenizer::nextCharsAreIdentifier()
417 {
418 UChar firstChar = m_input.currentInputChar();
419 if (isNameStart(firstChar) || nextTwoCharsAreValidEscape())
420 return true;
421
422 if (firstChar == '-') {
423 if (isNameStart(m_input.peek(1)))
424 return true;
425 return nextTwoCharsAreValidEscape();
426 }
427
428 return false;
429 }
430
431 } // namespace WebCore
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698