Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(88)

Side by Side Diff: Source/core/css/parser/MediaQueryTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Rebased and fixed float parsing Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2013 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "config.h"
32 #include "core/css/parser/MediaQueryTokenizer.h"
33
34 #include "core/css/parser/CSSInputStream.h"
35 #include "core/html/parser/HTMLParserIdioms.h"
36 #include "wtf/unicode/CharacterNames.h"
37
38 namespace WebCore {
39
40 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point
41 static bool isNameStart(UChar c)
42 {
43 if (isASCIIAlpha(c))
44 return true;
45 if (c == '_')
46 return true;
47 return !isASCII(c);
48 }
49
50 // http://www.w3.org/TR/css-syntax-3/#name-code-point
51 static bool isNameChar(UChar c)
52 {
53 return isNameStart(c) || isASCIIDigit(c) || c == '-';
54 }
55
56 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e
57 static bool twoCharsAreValidEscape(UChar first, UChar second)
58 {
59 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ;
60 }
61
62 MediaQueryTokenizer::MediaQueryTokenizer()
63 {
64 }
65
66 void MediaQueryTokenizer::reconsume(UChar c)
67 {
68 m_input->pushBack(c);
69 }
70
71 UChar MediaQueryTokenizer::consume()
72 {
73 UChar current = m_input->currentInputChar();
74 m_input->advance();
75 return current;
76 }
77
78 void MediaQueryTokenizer::consume(unsigned offset)
79 {
80 m_input->advance(offset);
81 }
82
83 CSSToken MediaQueryTokenizer::whiteSpace(UChar cc)
84 {
85 // CSS Tokenization is currently lossy, but we could record
86 // the exact whitespace instead of discarding it here.
87 consumeUntilNotWhitespace();
88 return CSSToken(WhitespaceToken);
89 }
90
91 CSSToken MediaQueryTokenizer::leftParen(UChar cc)
92 {
93 return CSSToken(LeftParenToken);
94 }
95
96 CSSToken MediaQueryTokenizer::rightParen(UChar cc)
97 {
98 return CSSToken(RightParenToken);
99 }
100
101 CSSToken MediaQueryTokenizer::plusOrFullStop(UChar cc)
102 {
103 if (nextCharsAreNumber()) {
104 reconsume(cc);
105 return consumeNumericToken();
106 }
107 return CSSToken(DelimToken, cc);
108 }
109
110 CSSToken MediaQueryTokenizer::comma(UChar cc)
111 {
112 return CSSToken(CommaToken);
113 }
114
115 CSSToken MediaQueryTokenizer::hyphenMinus(UChar cc)
116 {
117 if (nextCharsAreNumber()) {
118 reconsume(cc);
119 return consumeNumericToken();
120 }
121 if (nextCharsAreIdentifier()) {
122 reconsume(cc);
123 return consumeIdentLikeToken();
124 }
125 return CSSToken(DelimToken, cc);
126 }
127
128 CSSToken MediaQueryTokenizer::solidus(UChar cc)
129 {
130 return CSSToken(DelimToken, cc);
131 }
132
133 CSSToken MediaQueryTokenizer::colon(UChar cc)
134 {
135 return CSSToken(ColonToken);
136 }
137
138 CSSToken MediaQueryTokenizer::semiColon(UChar cc)
139 {
140 return CSSToken(SemicolonToken);
141 }
142
143 CSSToken MediaQueryTokenizer::reverseSolidus(UChar cc)
144 {
145 if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) {
146 reconsume(cc);
147 return consumeIdentLikeToken();
148 }
149 return CSSToken(DelimToken, cc);
150 }
151
152 CSSToken MediaQueryTokenizer::asciiDigit(UChar cc)
153 {
154 reconsume(cc);
155 return consumeNumericToken();
156 }
157
158 CSSToken MediaQueryTokenizer::nameStart(UChar cc)
159 {
160 reconsume(cc);
161 return consumeIdentLikeToken();
162 }
163
164 CSSToken MediaQueryTokenizer::endOfFile(UChar cc)
165 {
166 return CSSToken(EOFToken);
167 }
168
169 void MediaQueryTokenizer::tokenize(String string, Vector<CSSToken>& outTokens)
170 {
171 MediaQueryTokenizer tokenizer;
172 // According to the spec, we should perform preprocessing here.
173 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing
174 //
175 // However, we can skip this step since:
176 // * We're using HTML spaces (which accept \r and \f as a valid white space)
177 // * Do not count white spaces
178 // * consumeEscape replaces NULLs for replacement characters
179
180 CSSInputStream input(string);
181 while (true) {
182 outTokens.append(tokenizer.nextToken(input));
183 if (outTokens.last().type() == EOFToken)
184 return;
185 }
186 }
187
188 CSSToken MediaQueryTokenizer::nextToken(CSSInputStream& input)
189 {
190 // Unlike the HTMLTokenizer, the CSS Syntax spec is written
191 // as a stateless, (fixed-size) look-ahead tokenizer.
192 // We could move to the stateful model and instead create
193 // states for all the "next 3 codepoints are X" cases.
194 // State-machine tokenizers are easier to write to handle
195 // incremental tokenization of partial sources.
196 // However, for now we follow the spec exactly.
197 m_input = &input;
198 UChar cc = consume();
199 CodePoint codePointFunc = 0;
200
201 if (isASCII(cc)) {
202 ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM);
203 codePointFunc = getCodePoints()->codePoints[cc];
204 } else {
205 codePointFunc = &MediaQueryTokenizer::nameStart;
206 }
207
208 if (codePointFunc)
209 return ((this)->*(codePointFunc))(cc);
210
211 return CSSToken(DelimToken, cc);
212 }
213
214 // This method merges the following spec sections for efficiency
215 // http://www.w3.org/TR/css3-syntax/#consume-a-number
216 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number
217 CSSToken MediaQueryTokenizer::consumeNumber()
218 {
219 ASSERT(nextCharsAreNumber());
220 NumericValueType type = IntegerValueType;
221 double value = 0;
222 int sign = 1;
223 unsigned peekOffset = 0;
224 int exponentSign = 1;
225 unsigned exponentStartPos = 0;
226 unsigned exponentEndPos = 0;
227 unsigned fractionStartPos = 0;
228 unsigned fractionEndPos = 0;
229 unsigned long long integerPart;
230 double fractionPart;
231 unsigned fractionDigits;
232 unsigned long long exponentPart;
233 if (m_input->currentInputChar() == '+') {
234 ++peekOffset;
235 } else if (m_input->peek(peekOffset) == '-') {
236 sign = -1;
237 ++peekOffset;
238 }
239 unsigned intStartPos = peekOffset;
240 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
241 unsigned intEndPos = peekOffset;
242 if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOff set))) {
243 fractionStartPos = peekOffset;
244 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
245 fractionEndPos = peekOffset;
246 }
247 if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e')) {
248 int peekOffsetBeforeExponent = peekOffset;
249 ++peekOffset;
250 if (m_input->peek(peekOffset) == '+') {
251 ++peekOffset;
252 } else if (m_input->peek(peekOffset) =='-') {
253 exponentSign = -1;
254 ++peekOffset;
255 }
256 exponentStartPos = peekOffset;
257 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
258 exponentEndPos = peekOffset;
259 if (exponentEndPos == exponentStartPos)
260 peekOffset = peekOffsetBeforeExponent;
261 }
262 integerPart = m_input->getUInt(intStartPos, intEndPos);
263 fractionDigits = fractionEndPos - fractionStartPos;
264 unsigned floatingFractionEndPos = fractionEndPos;
265 fractionPart = m_input->getDouble(--fractionStartPos, floatingFractionEndPos );
266 exponentPart = m_input->getUInt(exponentStartPos, exponentEndPos);
267 double exponent = pow(10, (float)exponentSign * (double)exponentPart);
268 value = (double)sign * ((double)integerPart + fractionPart) * exponent;
269
270 m_input->advance(peekOffset);
271 if (fractionDigits > 0)
272 type = NumberValueType;
273
274 return CSSToken(NumberToken, value, type);
275 }
276
277 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token
278 CSSToken MediaQueryTokenizer::consumeNumericToken()
279 {
280 CSSToken token = consumeNumber();
281 if (nextCharsAreIdentifier())
282 token.convertToDimensionWithUnit(consumeName());
283 else if (consumeIfNext('%'))
284 token.convertToPercentage();
285 return token;
286 }
287
288 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token
289 CSSToken MediaQueryTokenizer::consumeIdentLikeToken()
290 {
291 String name = consumeName();
292 if (consumeIfNext('('))
293 return CSSToken(FunctionToken, name);
294 return CSSToken(IdentToken, name);
295 }
296
297 void MediaQueryTokenizer::consumeUntilNotWhitespace()
298 {
299 // Using HTML space here rather than CSS space since we don't do preprocessi ng
300 while (isHTMLSpace<UChar>(m_input->currentInputChar()))
301 consume();
302 }
303
304 bool MediaQueryTokenizer::consumeIfNext(UChar character)
305 {
306 if (m_input->currentInputChar() == character) {
307 consume();
308 return true;
309 }
310 return false;
311 }
312
313 // http://www.w3.org/TR/css3-syntax/#consume-a-name
314 String MediaQueryTokenizer::consumeName()
315 {
316 // FIXME: Is this as efficient as it can be?
317 // The possibility of escape chars mandates a copy AFAICT.
318 Vector<UChar> result;
319 while (true) {
320 if (isNameChar(m_input->currentInputChar())) {
321 result.append(consume());
322 continue;
323 }
324 if (nextTwoCharsAreValidEscape()) {
325 // "consume()" fixes a spec bug.
326 // The first code point should be consumed before consuming the esca ped code point.
327 consume();
328 result.append(consumeEscape());
329 continue;
330 }
331 return String(result);
332 }
333 }
334
335 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point
336 UChar MediaQueryTokenizer::consumeEscape()
337 {
338 UChar cc = consume();
339 ASSERT(cc != '\n');
340 if (isASCIIHexDigit(cc)) {
341 unsigned consumedHexDigits = 1;
342 String hexChars;
343 do {
344 hexChars.append(cc);
345 cc = consume();
346 consumedHexDigits++;
347 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));
348 bool ok = false;
349 UChar codePoint = hexChars.toUIntStrict(&ok, 16);
350 if (!ok)
351 return WTF::Unicode::replacementCharacter;
352 return codePoint;
353 }
354
355 // Replaces NULLs with replacement characters, since we do not perform prepr ocessing
356 if (cc == kEndOfFileMarker)
357 return WTF::Unicode::replacementCharacter;
358 return cc;
359 }
360
361 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()
362 {
363 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
364 }
365
366 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number
367 bool MediaQueryTokenizer::nextCharsAreNumber()
368 {
369 UChar first = m_input->currentInputChar();
370 UChar second = m_input->peek(1);
371 if (isASCIIDigit(first))
372 return true;
373 if (first == '+' || first == '-')
374 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input ->peek(2))));
375 if (first =='.')
376 return (isASCIIDigit(second));
377 return false;
378 }
379
380 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier
381 bool MediaQueryTokenizer::nextCharsAreIdentifier()
382 {
383 UChar firstChar = m_input->currentInputChar();
384 if (isNameStart(firstChar) || nextTwoCharsAreValidEscape())
385 return true;
386
387 if (firstChar == '-') {
388 if (isNameStart(m_input->peek(1)))
389 return true;
390 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
391 }
392
393 return false;
394 }
395
396 } // namespace WebCore
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698