Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(786)

Side by Side Diff: Source/core/css/parser/MediaQueryTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Removed comment Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "config.h"
6 #include "core/css/parser/MediaQueryTokenizer.h"
7
8 #include "core/css/parser/MediaQueryInputStream.h"
9 #include "core/html/parser/HTMLParserIdioms.h"
10 #include "wtf/unicode/CharacterNames.h"
11
12 namespace WebCore {
13
14 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point
15 static bool isNameStart(UChar c)
16 {
17 if (isASCIIAlpha(c))
18 return true;
19 if (c == '_')
20 return true;
21 return !isASCII(c);
22 }
23
24 // http://www.w3.org/TR/css-syntax-3/#name-code-point
25 static bool isNameChar(UChar c)
26 {
27 return isNameStart(c) || isASCIIDigit(c) || c == '-';
28 }
29
30 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e
31 static bool twoCharsAreValidEscape(UChar first, UChar second)
32 {
33 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ;
34 }
35
36 MediaQueryTokenizer::MediaQueryTokenizer()
37 {
38 }
39
40 void MediaQueryTokenizer::reconsume(UChar c)
41 {
42 m_input->pushBack(c);
43 }
44
45 UChar MediaQueryTokenizer::consume()
46 {
47 UChar current = m_input->currentInputChar();
48 m_input->advance();
49 return current;
50 }
51
52 void MediaQueryTokenizer::consume(unsigned offset)
53 {
54 m_input->advance(offset);
55 }
56
57 MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc)
58 {
59 // CSS Tokenization is currently lossy, but we could record
60 // the exact whitespace instead of discarding it here.
61 consumeUntilNotWhitespace();
62 return MediaQueryToken(WhitespaceToken);
63 }
64
65 MediaQueryToken MediaQueryTokenizer::leftParen(UChar cc)
66 {
67 return MediaQueryToken(LeftParenToken);
68 }
69
70 MediaQueryToken MediaQueryTokenizer::rightParen(UChar cc)
71 {
72 return MediaQueryToken(RightParenToken);
73 }
74
75 MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc)
76 {
77 if (nextCharsAreNumber()) {
78 reconsume(cc);
79 return consumeNumericToken();
80 }
81 return MediaQueryToken(DelimToken, cc);
kenneth.r.christiansen 2014/03/08 22:37:47 DelimiterToken why not write it out, it is quite s
82 }
83
84 MediaQueryToken MediaQueryTokenizer::comma(UChar cc)
85 {
86 return MediaQueryToken(CommaToken);
87 }
88
89 MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc)
90 {
91 if (nextCharsAreNumber()) {
92 reconsume(cc);
93 return consumeNumericToken();
94 }
95 if (nextCharsAreIdentifier()) {
96 reconsume(cc);
97 return consumeIdentLikeToken();
98 }
99 return MediaQueryToken(DelimToken, cc);
100 }
101
102 MediaQueryToken MediaQueryTokenizer::solidus(UChar cc)
103 {
104 return MediaQueryToken(DelimToken, cc);
105 }
106
107 MediaQueryToken MediaQueryTokenizer::colon(UChar cc)
108 {
109 return MediaQueryToken(ColonToken);
110 }
111
112 MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc)
113 {
114 return MediaQueryToken(SemicolonToken);
115 }
116
117 MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc)
118 {
119 if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) {
120 reconsume(cc);
121 return consumeIdentLikeToken();
122 }
123 return MediaQueryToken(DelimToken, cc);
124 }
125
126 MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc)
127 {
128 reconsume(cc);
129 return consumeNumericToken();
130 }
131
132 MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc)
133 {
134 reconsume(cc);
135 return consumeIdentLikeToken();
136 }
137
138 MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc)
139 {
140 return MediaQueryToken(EOFToken);
141 }
142
143 void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTo kens)
144 {
145 MediaQueryTokenizer tokenizer;
146 // According to the spec, we should perform preprocessing here.
147 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing
148 //
149 // However, we can skip this step since:
150 // * We're using HTML spaces (which accept \r and \f as a valid white space)
151 // * Do not count white spaces
152 // * consumeEscape replaces NULLs for replacement characters
153
154 MediaQueryInputStream input(string);
155 while (true) {
156 outTokens.append(tokenizer.nextToken(input));
157 if (outTokens.last().type() == EOFToken)
158 return;
159 }
160 }
161
162 MediaQueryToken MediaQueryTokenizer::nextToken(MediaQueryInputStream& input)
163 {
164 // Unlike the HTMLTokenizer, the CSS Syntax spec is written
165 // as a stateless, (fixed-size) look-ahead tokenizer.
166 // We could move to the stateful model and instead create
167 // states for all the "next 3 codepoints are X" cases.
168 // State-machine tokenizers are easier to write to handle
169 // incremental tokenization of partial sources.
170 // However, for now we follow the spec exactly.
171 m_input = &input;
172 UChar cc = consume();
173 CodePoint codePointFunc = 0;
174
175 if (isASCII(cc)) {
176 ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM);
177 codePointFunc = getCodePoints()->codePoints[cc];
178 } else {
179 codePointFunc = &MediaQueryTokenizer::nameStart;
180 }
181
182 if (codePointFunc)
183 return ((this)->*(codePointFunc))(cc);
184
185 return MediaQueryToken(DelimToken, cc);
186 }
187
188 // This method merges the following spec sections for efficiency
189 // http://www.w3.org/TR/css3-syntax/#consume-a-number
190 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number
191 MediaQueryToken MediaQueryTokenizer::consumeNumber()
192 {
193 ASSERT(nextCharsAreNumber());
194 NumericValueType type = IntegerValueType;
195 double value = 0;
196 int sign = 1;
197 unsigned peekOffset = 0;
198 int exponentSign = 1;
199 unsigned exponentStartPos = 0;
200 unsigned exponentEndPos = 0;
201 unsigned fractionStartPos = 0;
202 unsigned fractionEndPos = 0;
203 unsigned long long integerPart;
204 double fractionPart;
205 unsigned fractionDigits;
206 unsigned long long exponentPart;
207 if (m_input->currentInputChar() == '+') {
208 ++peekOffset;
209 } else if (m_input->peek(peekOffset) == '-') {
210 sign = -1;
211 ++peekOffset;
212 }
213 unsigned intStartPos = peekOffset;
214 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
215 unsigned intEndPos = peekOffset;
216 if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOff set))) {
217 fractionStartPos = peekOffset - 1;
218 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
219 fractionEndPos = peekOffset;
220 }
221 if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e')) {
222 int peekOffsetBeforeExponent = peekOffset;
223 ++peekOffset;
224 if (m_input->peek(peekOffset) == '+') {
225 ++peekOffset;
226 } else if (m_input->peek(peekOffset) =='-') {
227 exponentSign = -1;
228 ++peekOffset;
229 }
230 exponentStartPos = peekOffset;
231 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
232 exponentEndPos = peekOffset;
233 if (exponentEndPos == exponentStartPos)
234 peekOffset = peekOffsetBeforeExponent;
235 }
236 integerPart = m_input->getUInt(intStartPos, intEndPos);
237 fractionDigits = fractionEndPos - fractionStartPos;
238 unsigned floatingFractionEndPos = fractionEndPos;
239 fractionPart = m_input->getDouble(fractionStartPos, floatingFractionEndPos);
240 exponentPart = m_input->getUInt(exponentStartPos, exponentEndPos);
241 double exponent = pow(10, (float)exponentSign * (double)exponentPart);
242 value = (double)sign * ((double)integerPart + fractionPart) * exponent;
243
244 m_input->advance(peekOffset);
245 if (fractionDigits > 0)
246 type = NumberValueType;
247
248 return MediaQueryToken(NumberToken, value, type);
249 }
250
251 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token
252 MediaQueryToken MediaQueryTokenizer::consumeNumericToken()
253 {
254 MediaQueryToken token = consumeNumber();
255 if (nextCharsAreIdentifier())
256 token.convertToDimensionWithUnit(consumeName());
257 else if (consumeIfNext('%'))
258 token.convertToPercentage();
259 return token;
260 }
261
262 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token
263 MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken()
264 {
265 String name = consumeName();
266 if (consumeIfNext('('))
267 return MediaQueryToken(FunctionToken, name);
268 return MediaQueryToken(IdentToken, name);
269 }
270
271 void MediaQueryTokenizer::consumeUntilNotWhitespace()
272 {
273 // Using HTML space here rather than CSS space since we don't do preprocessi ng
274 while (isHTMLSpace<UChar>(m_input->currentInputChar()))
275 consume();
276 }
277
278 bool MediaQueryTokenizer::consumeIfNext(UChar character)
279 {
280 if (m_input->currentInputChar() == character) {
281 consume();
282 return true;
283 }
284 return false;
285 }
286
287 // http://www.w3.org/TR/css3-syntax/#consume-a-name
288 String MediaQueryTokenizer::consumeName()
289 {
290 // FIXME: Is this as efficient as it can be?
291 // The possibility of escape chars mandates a copy AFAICT.
292 Vector<UChar> result;
293 while (true) {
294 if (isNameChar(m_input->currentInputChar())) {
295 result.append(consume());
296 continue;
297 }
298 if (nextTwoCharsAreValidEscape()) {
299 // "consume()" fixes a spec bug.
300 // The first code point should be consumed before consuming the esca ped code point.
301 consume();
302 result.append(consumeEscape());
303 continue;
304 }
305 return String(result);
306 }
307 }
308
309 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point
310 UChar MediaQueryTokenizer::consumeEscape()
311 {
312 UChar cc = consume();
313 ASSERT(cc != '\n');
314 if (isASCIIHexDigit(cc)) {
315 unsigned consumedHexDigits = 1;
316 String hexChars;
317 do {
318 hexChars.append(cc);
319 cc = consume();
320 consumedHexDigits++;
321 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));
322 bool ok = false;
323 UChar codePoint = hexChars.toUIntStrict(&ok, 16);
324 if (!ok)
325 return WTF::Unicode::replacementCharacter;
326 return codePoint;
327 }
328
329 // Replaces NULLs with replacement characters, since we do not perform prepr ocessing
330 if (cc == kEndOfFileMarker)
331 return WTF::Unicode::replacementCharacter;
332 return cc;
333 }
334
335 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()
336 {
337 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
338 }
339
340 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number
341 bool MediaQueryTokenizer::nextCharsAreNumber()
342 {
343 UChar first = m_input->currentInputChar();
344 UChar second = m_input->peek(1);
345 if (isASCIIDigit(first))
346 return true;
347 if (first == '+' || first == '-')
348 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input ->peek(2))));
349 if (first =='.')
350 return (isASCIIDigit(second));
351 return false;
352 }
353
354 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier
355 bool MediaQueryTokenizer::nextCharsAreIdentifier()
356 {
357 UChar firstChar = m_input->currentInputChar();
358 if (isNameStart(firstChar) || nextTwoCharsAreValidEscape())
359 return true;
360
361 if (firstChar == '-') {
362 if (isNameStart(m_input->peek(1)))
363 return true;
364 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
365 }
366
367 return false;
368 }
369
370 } // namespace WebCore
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698