Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(204)

Side by Side Diff: Source/core/css/parser/MediaQueryTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Fixed gcc compile issues and debug asserts Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "config.h"
6 #include "core/css/parser/MediaQueryTokenizer.h"
7
8 #include "core/css/parser/MediaQueryInputStream.h"
9 #include "core/html/parser/HTMLParserIdioms.h"
10 #include "wtf/unicode/CharacterNames.h"
11
12 namespace WebCore {
13
14 const unsigned codePointsNumber = SCHAR_MAX;
15
16 class MediaQueryTokenizer::CodePoints {
17 public:
18 MediaQueryTokenizer::CodePoint codePoints[codePointsNumber];
19
20 CodePoints()
21 {
22 memset(codePoints, 0, codePointsNumber);
23 codePoints['\n'] = &MediaQueryTokenizer::whiteSpace;
24 codePoints['\r'] = &MediaQueryTokenizer::whiteSpace;
25 codePoints['\t'] = &MediaQueryTokenizer::whiteSpace;
26 codePoints[' '] = &MediaQueryTokenizer::whiteSpace;
27 codePoints['\f'] = &MediaQueryTokenizer::whiteSpace;
28 codePoints['('] = &MediaQueryTokenizer::leftParenthesis;
29 codePoints[')'] = &MediaQueryTokenizer::rightParenthesis;
30 codePoints['+'] = &MediaQueryTokenizer::plusOrFullStop;
31 codePoints['.'] = &MediaQueryTokenizer::plusOrFullStop;
32 codePoints[','] = &MediaQueryTokenizer::comma;
33 codePoints['-'] = &MediaQueryTokenizer::hyphenMinus;
34 codePoints['/'] = &MediaQueryTokenizer::solidus;
35 codePoints[':'] = &MediaQueryTokenizer::colon;
36 codePoints[';'] = &MediaQueryTokenizer::semiColon;
37 codePoints['\\'] = &MediaQueryTokenizer::reverseSolidus;
38 for (unsigned char digit = '0'; digit <= '9'; ++digit)
39 codePoints[digit] = &MediaQueryTokenizer::asciiDigit;
40 for (unsigned char alpha = 'a'; alpha <= 'z'; ++alpha)
41 codePoints[alpha] = &MediaQueryTokenizer::nameStart;
42 for (unsigned char alpha = 'A'; alpha <= 'Z'; ++alpha)
43 codePoints[alpha] = &MediaQueryTokenizer::nameStart;
44 codePoints['_'] = &MediaQueryTokenizer::nameStart;
45 codePoints[kEndOfFileMarker] = &MediaQueryTokenizer::endOfFile;
46 }
47 };
48
49 MediaQueryTokenizer::CodePoints* MediaQueryTokenizer::codePoints()
50 {
51 static CodePoints codePoints;
52 return &codePoints;
53 }
54
55 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point
56 static bool isNameStart(UChar c)
57 {
58 if (isASCIIAlpha(c))
59 return true;
60 if (c == '_')
61 return true;
62 return !isASCII(c);
63 }
64
65 // http://www.w3.org/TR/css-syntax-3/#name-code-point
66 static bool isNameChar(UChar c)
67 {
68 return isNameStart(c) || isASCIIDigit(c) || c == '-';
69 }
70
71 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e
72 static bool twoCharsAreValidEscape(UChar first, UChar second)
73 {
74 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ;
75 }
76
77 MediaQueryTokenizer::MediaQueryTokenizer()
78 {
79 }
80
81 void MediaQueryTokenizer::reconsume(UChar c)
82 {
83 m_input->pushBack(c);
84 }
85
86 UChar MediaQueryTokenizer::consume()
87 {
88 UChar current = m_input->currentInputChar();
89 m_input->advance();
90 return current;
91 }
92
93 void MediaQueryTokenizer::consume(unsigned offset)
94 {
95 m_input->advance(offset);
96 }
97
98 MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc)
99 {
100 // CSS Tokenization is currently lossy, but we could record
101 // the exact whitespace instead of discarding it here.
102 consumeUntilNonWhitespace();
103 return MediaQueryToken(WhitespaceToken);
104 }
105
106 MediaQueryToken MediaQueryTokenizer::leftParenthesis(UChar cc)
107 {
108 return MediaQueryToken(LeftParenthesisToken);
109 }
110
111 MediaQueryToken MediaQueryTokenizer::rightParenthesis(UChar cc)
112 {
113 return MediaQueryToken(RightParenthesisToken);
114 }
115
116 MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc)
117 {
118 if (nextCharsAreNumber()) {
119 reconsume(cc);
120 return consumeNumericToken();
121 }
122 return MediaQueryToken(DelimiterToken, cc);
123 }
124
125 MediaQueryToken MediaQueryTokenizer::comma(UChar cc)
126 {
127 return MediaQueryToken(CommaToken);
128 }
129
130 MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc)
131 {
132 if (nextCharsAreNumber()) {
133 reconsume(cc);
134 return consumeNumericToken();
135 }
136 if (nextCharsAreIdentifier()) {
137 reconsume(cc);
138 return consumeIdentLikeToken();
139 }
140 return MediaQueryToken(DelimiterToken, cc);
141 }
142
143 MediaQueryToken MediaQueryTokenizer::solidus(UChar cc)
144 {
145 return MediaQueryToken(DelimiterToken, cc);
146 }
147
148 MediaQueryToken MediaQueryTokenizer::colon(UChar cc)
149 {
150 return MediaQueryToken(ColonToken);
151 }
152
153 MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc)
154 {
155 return MediaQueryToken(SemicolonToken);
156 }
157
158 MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc)
159 {
160 if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) {
161 reconsume(cc);
162 return consumeIdentLikeToken();
163 }
164 return MediaQueryToken(DelimiterToken, cc);
165 }
166
167 MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc)
168 {
169 reconsume(cc);
170 return consumeNumericToken();
171 }
172
173 MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc)
174 {
175 reconsume(cc);
176 return consumeIdentLikeToken();
177 }
178
179 MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc)
180 {
181 return MediaQueryToken(EOFToken);
182 }
183
184 void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTo kens)
185 {
186 MediaQueryTokenizer tokenizer;
187 // According to the spec, we should perform preprocessing here.
188 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing
189 //
190 // However, we can skip this step since:
191 // * We're using HTML spaces (which accept \r and \f as a valid white space)
192 // * Do not count white spaces
193 // * consumeEscape replaces NULLs for replacement characters
194
195 MediaQueryInputStream input(string);
196 while (true) {
197 outTokens.append(tokenizer.nextToken(input));
198 if (outTokens.last().type() == EOFToken)
199 return;
200 }
201 }
202
203 MediaQueryToken MediaQueryTokenizer::nextToken(MediaQueryInputStream& input)
204 {
205 // Unlike the HTMLTokenizer, the CSS Syntax spec is written
206 // as a stateless, (fixed-size) look-ahead tokenizer.
207 // We could move to the stateful model and instead create
208 // states for all the "next 3 codepoints are X" cases.
209 // State-machine tokenizers are easier to write to handle
210 // incremental tokenization of partial sources.
211 // However, for now we follow the spec exactly.
212 m_input = &input;
213 UChar cc = consume();
214 CodePoint codePointFunc = 0;
215
216 if (isASCII(cc)) {
217 ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber);
218 codePointFunc = codePoints()->codePoints[cc];
219 } else {
220 codePointFunc = &MediaQueryTokenizer::nameStart;
221 }
222
223 if (codePointFunc)
224 return ((this)->*(codePointFunc))(cc);
225
226 return MediaQueryToken(DelimiterToken, cc);
227 }
228
229 // This method merges the following spec sections for efficiency
230 // http://www.w3.org/TR/css3-syntax/#consume-a-number
231 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number
232 MediaQueryToken MediaQueryTokenizer::consumeNumber()
eseidel 2014/03/13 17:58:45 This is a really long function and it might make s
233 {
234 ASSERT(nextCharsAreNumber());
235 NumericValueType type = IntegerValueType;
236 double value = 0;
237 int sign = 1;
238 unsigned peekOffset = 0;
239 int exponentSign = 1;
240 unsigned exponentStartPos = 0;
241 unsigned exponentEndPos = 0;
242 unsigned fractionStartPos = 0;
243 unsigned fractionEndPos = 0;
244 unsigned long long integerPart;
245 double fractionPart;
246 unsigned fractionDigits;
247 unsigned long long exponentPart;
248 if (m_input->currentInputChar() == '+') {
249 ++peekOffset;
250 } else if (m_input->peek(peekOffset) == '-') {
251 sign = -1;
252 ++peekOffset;
253 }
254 unsigned intStartPos = peekOffset;
255 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
256 unsigned intEndPos = peekOffset;
257 if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOff set))) {
258 fractionStartPos = peekOffset - 1;
259 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
260 fractionEndPos = peekOffset;
261 }
262 if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e')) {
263 int peekOffsetBeforeExponent = peekOffset;
264 ++peekOffset;
265 if (m_input->peek(peekOffset) == '+') {
266 ++peekOffset;
267 } else if (m_input->peek(peekOffset) =='-') {
268 exponentSign = -1;
269 ++peekOffset;
270 }
271 exponentStartPos = peekOffset;
272 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
273 exponentEndPos = peekOffset;
274 if (exponentEndPos == exponentStartPos)
275 peekOffset = peekOffsetBeforeExponent;
276 }
277 integerPart = m_input->getUInt(intStartPos, intEndPos);
278 fractionDigits = fractionEndPos - fractionStartPos;
279 unsigned floatingFractionEndPos = fractionEndPos;
280 fractionPart = m_input->getDouble(fractionStartPos, floatingFractionEndPos);
281 exponentPart = m_input->getUInt(exponentStartPos, exponentEndPos);
282 double exponent = pow(10, (float)exponentSign * (double)exponentPart);
283 value = (double)sign * ((double)integerPart + fractionPart) * exponent;
284
285 m_input->advance(peekOffset);
286 if (fractionDigits > 0)
287 type = NumberValueType;
288
289 return MediaQueryToken(NumberToken, value, type);
290 }
291
292 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token
293 MediaQueryToken MediaQueryTokenizer::consumeNumericToken()
294 {
295 MediaQueryToken token = consumeNumber();
296 if (nextCharsAreIdentifier())
297 token.convertToDimensionWithUnit(consumeName());
298 else if (consumeIfNext('%'))
299 token.convertToPercentage();
300 return token;
301 }
302
303 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token
304 MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken()
305 {
306 String name = consumeName();
307 if (consumeIfNext('('))
308 return MediaQueryToken(FunctionToken, name);
309 return MediaQueryToken(IdentToken, name);
310 }
311
312 void MediaQueryTokenizer::consumeUntilNonWhitespace()
313 {
314 // Using HTML space here rather than CSS space since we don't do preprocessi ng
315 while (isHTMLSpace<UChar>(m_input->currentInputChar()))
316 consume();
317 }
318
319 bool MediaQueryTokenizer::consumeIfNext(UChar character)
320 {
321 if (m_input->currentInputChar() == character) {
322 consume();
323 return true;
324 }
325 return false;
326 }
327
328 // http://www.w3.org/TR/css3-syntax/#consume-a-name
329 String MediaQueryTokenizer::consumeName()
330 {
331 // FIXME: Is this as efficient as it can be?
332 // The possibility of escape chars mandates a copy AFAICT.
333 Vector<UChar> result;
334 while (true) {
335 if (isNameChar(m_input->currentInputChar())) {
336 result.append(consume());
337 continue;
338 }
339 if (nextTwoCharsAreValidEscape()) {
340 // "consume()" fixes a spec bug.
341 // The first code point should be consumed before consuming the esca ped code point.
342 consume();
343 result.append(consumeEscape());
344 continue;
345 }
346 return String(result);
347 }
348 }
349
350 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point
351 UChar MediaQueryTokenizer::consumeEscape()
352 {
353 UChar cc = consume();
354 ASSERT(cc != '\n');
355 if (isASCIIHexDigit(cc)) {
356 unsigned consumedHexDigits = 1;
357 String hexChars;
358 do {
359 hexChars.append(cc);
360 cc = consume();
361 consumedHexDigits++;
362 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));
363 bool ok = false;
364 UChar codePoint = hexChars.toUIntStrict(&ok, 16);
365 if (!ok)
366 return WTF::Unicode::replacementCharacter;
367 return codePoint;
368 }
369
370 // Replaces NULLs with replacement characters, since we do not perform prepr ocessing
371 if (cc == kEndOfFileMarker)
372 return WTF::Unicode::replacementCharacter;
373 return cc;
374 }
375
376 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()
377 {
378 if (m_input->leftChars() < 2)
379 return false;
380 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
381 }
382
383 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number
384 bool MediaQueryTokenizer::nextCharsAreNumber()
385 {
386 UChar first = m_input->currentInputChar();
387 UChar second = m_input->peek(1);
388 if (isASCIIDigit(first))
389 return true;
390 if (first == '+' || first == '-')
391 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input ->peek(2))));
392 if (first =='.')
393 return (isASCIIDigit(second));
394 return false;
395 }
396
397 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier
398 bool MediaQueryTokenizer::nextCharsAreIdentifier()
399 {
400 UChar firstChar = m_input->currentInputChar();
eseidel 2014/03/13 17:58:45 Is m_input ever null? Can we make it a reference?
401 if (isNameStart(firstChar) || nextTwoCharsAreValidEscape())
402 return true;
403
404 if (firstChar == '-') {
405 if (isNameStart(m_input->peek(1)))
406 return true;
407 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
408 }
409
410 return false;
411 }
412
413 } // namespace WebCore
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698