Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(64)

Side by Side Diff: Source/core/css/parser/MediaQueryTokenizer.cpp

Issue 171383002: A thread-safe Media Query Parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Another attempt to fix Android build issues Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2013 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "config.h"
32 #include "core/css/parser/MediaQueryTokenizer.h"
33
34 #include "core/css/parser/CSSInputStream.h"
35 #include "core/html/parser/HTMLParserIdioms.h"
36 #include "wtf/unicode/CharacterNames.h"
37 #include <cfloat>
38
39 namespace WebCore {
40
41 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point
42 static bool isNameStart(UChar c)
43 {
44 if (isASCIIAlpha(c))
45 return true;
46 if (c == '_')
47 return true;
48 return !isASCII(c);
49 }
50
51 // http://www.w3.org/TR/css-syntax-3/#name-code-point
52 static bool isNameChar(UChar c)
53 {
54 return isNameStart(c) || isASCIIDigit(c) || c == '-';
55 }
56
57 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e
58 static bool twoCharsAreValidEscape(UChar first, UChar second)
59 {
60 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ;
61 }
62
63 MediaQueryTokenizer::MediaQueryTokenizer()
64 {
65 }
66
67 void MediaQueryTokenizer::reconsume(UChar c)
68 {
69 m_input->pushBack(c);
70 }
71
72 UChar MediaQueryTokenizer::consume()
73 {
74 UChar current = m_input->currentInputChar();
75 m_input->advance();
76 return current;
77 }
78
79 void MediaQueryTokenizer::consume(unsigned offset)
80 {
81 m_input->advance(offset);
82 }
83
84 CSSToken MediaQueryTokenizer::whiteSpace(UChar cc)
85 {
86 // CSS Tokenization is currently lossy, but we could record
87 // the exact whitespace instead of discarding it here.
88 consumeUntilNotWhitespace();
89 return CSSToken(WhitespaceToken);
90 }
91
92 CSSToken MediaQueryTokenizer::leftParen(UChar cc)
93 {
94 return CSSToken(LeftParenToken);
95 }
96
97 CSSToken MediaQueryTokenizer::rightParen(UChar cc)
98 {
99 return CSSToken(RightParenToken);
100 }
101
102 CSSToken MediaQueryTokenizer::plusOrFullStop(UChar cc)
103 {
104 if (nextCharsAreNumber()) {
105 reconsume(cc);
106 return consumeNumericToken();
107 }
108 return CSSToken(DelimToken, cc);
109 }
110
111 CSSToken MediaQueryTokenizer::comma(UChar cc)
112 {
113 return CSSToken(CommaToken);
114 }
115
116 CSSToken MediaQueryTokenizer::hyphenMinus(UChar cc)
117 {
118 if (nextCharsAreNumber()) {
119 reconsume(cc);
120 return consumeNumericToken();
121 }
122 if (nextCharsAreIdentifier()) {
123 reconsume(cc);
124 return consumeIdentLikeToken();
125 }
126 return CSSToken(DelimToken, cc);
127 }
128
129 CSSToken MediaQueryTokenizer::solidus(UChar cc)
130 {
131 return CSSToken(DelimToken, cc);
132 }
133
134 CSSToken MediaQueryTokenizer::colon(UChar cc)
135 {
136 return CSSToken(ColonToken);
137 }
138
139 CSSToken MediaQueryTokenizer::semiColon(UChar cc)
140 {
141 return CSSToken(SemicolonToken);
142 }
143
144 CSSToken MediaQueryTokenizer::reverseSolidus(UChar cc)
145 {
146 if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) {
147 reconsume(cc);
148 return consumeIdentLikeToken();
149 }
150 return CSSToken(DelimToken, cc);
151 }
152
153 CSSToken MediaQueryTokenizer::asciiDigit(UChar cc)
154 {
155 reconsume(cc);
156 return consumeNumericToken();
157 }
158
159 CSSToken MediaQueryTokenizer::nameStart(UChar cc)
160 {
161 reconsume(cc);
162 return consumeIdentLikeToken();
163 }
164
165 CSSToken MediaQueryTokenizer::endOfFile(UChar cc)
166 {
167 return CSSToken(EOFToken);
168 }
169
170 void MediaQueryTokenizer::tokenize(String string, Vector<CSSToken>& outTokens)
171 {
172 MediaQueryTokenizer tokenizer;
173 // According to the spec, we should perform preprocessing here.
174 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing
175 //
176 // However, we can skip this step since:
177 // * We're using HTML spaces (which accept \r and \f as a valid white space)
178 // * Do not count white spaces
179 // * consumeEscape replaces NULLs for replacement characters
180
181 CSSInputStream input(string);
182 while (true) {
183 outTokens.append(tokenizer.nextToken(input));
184 if (outTokens.last().type() == EOFToken)
185 return;
186 }
187 }
188
189 CSSToken MediaQueryTokenizer::nextToken(CSSInputStream& input)
190 {
191 // Unlike the HTMLTokenizer, the CSS Syntax spec is written
192 // as a stateless, (fixed-size) look-ahead tokenizer.
193 // We could move to the stateful model and instead create
194 // states for all the "next 3 codepoints are X" cases.
195 // State-machine tokenizers are easier to write to handle
196 // incremental tokenization of partial sources.
197 // However, for now we follow the spec exactly.
198 m_input = &input;
199 UChar cc = consume();
200 CodePoint codePointFunc = 0;
201
202 if (isASCII(cc)) {
203 ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM);
204 codePointFunc = getCodePoints()->codePoints[cc];
205 } else {
206 codePointFunc = &MediaQueryTokenizer::nameStart;
207 }
208
209 if (codePointFunc)
210 return ((this)->*(codePointFunc))(cc);
211
212 return CSSToken(DelimToken, cc);
213 }
214
215 // This method merges the following spec sections for efficiency
216 // http://www.w3.org/TR/css3-syntax/#consume-a-number
217 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number
218 CSSToken MediaQueryTokenizer::consumeNumber()
219 {
220 ASSERT(nextCharsAreNumber());
221 NumericValueType type = IntegerValueType;
222 double value = 0;
223 int sign = 1;
224 unsigned peekOffset = 0;
225 int exponentSign = 1;
226 unsigned exponentStartPos = 0;
227 unsigned exponentEndPos = 0;
228 unsigned fractionStartPos = 0;
229 unsigned fractionEndPos = 0;
230 unsigned long long integerPart;
231 unsigned long long fractionPart;
232 unsigned fractionDigits;
233 unsigned long long exponentPart;
234 if (m_input->currentInputChar() == '+') {
235 ++peekOffset;
236 } else if (m_input->peek(peekOffset) == '-') {
237 sign = -1;
238 ++peekOffset;
239 }
240 unsigned intStartPos = peekOffset;
241 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
242 unsigned intEndPos = peekOffset;
243 if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOff set))) {
244 fractionStartPos = peekOffset;
245 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
246 fractionEndPos = peekOffset;
247 }
248 if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e')) {
249 int peekOffsetBeforeExponent = peekOffset;
250 ++peekOffset;
251 if (m_input->peek(peekOffset) == '+') {
252 ++peekOffset;
253 } else if (m_input->peek(peekOffset) =='-') {
254 exponentSign = -1;
255 ++peekOffset;
256 }
257 exponentStartPos = peekOffset;
258 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset);
259 exponentEndPos = peekOffset;
260 if (exponentEndPos == exponentStartPos)
261 peekOffset = peekOffsetBeforeExponent;
262 }
263 integerPart = m_input->getNumber(intStartPos, intEndPos);
264 fractionDigits = fractionEndPos - fractionStartPos;
265 unsigned floatingFractionEndPos = fractionEndPos;
266 if (fractionDigits > DBL_DIG) {
267 // Limit the number of fraction digits, to avoid double (and fractionPar t) from overflowing
268 fractionDigits = DBL_DIG;
269 floatingFractionEndPos = fractionStartPos + DBL_DIG;
270 }
271 fractionPart = m_input->getNumber(fractionStartPos, floatingFractionEndPos);
272 exponentPart = m_input->getNumber(exponentStartPos, exponentEndPos);
273 double fractionDivisor = pow((double)10.0, (double)(fractionDigits));
274 double exponent = pow(10, (float)exponentSign * (double)exponentPart);
275 value = (double)sign * ((double)integerPart + (double)fractionPart / fractio nDivisor) * exponent;
276
277 m_input->advance(peekOffset);
278 if (fractionDigits > 0)
279 type = NumberValueType;
280
281 return CSSToken(NumberToken, value, type);
282 }
283
284 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token
285 CSSToken MediaQueryTokenizer::consumeNumericToken()
286 {
287 CSSToken token = consumeNumber();
288 if (nextCharsAreIdentifier())
289 token.convertToDimensionWithUnit(consumeName());
290 else if (consumeIfNext('%'))
291 token.convertToPercentage();
292 return token;
293 }
294
295 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token
296 CSSToken MediaQueryTokenizer::consumeIdentLikeToken()
297 {
298 String name = consumeName();
299 if (consumeIfNext('('))
300 return CSSToken(FunctionToken, name);
301 return CSSToken(IdentToken, name);
302 }
303
304 void MediaQueryTokenizer::consumeUntilNotWhitespace()
305 {
306 // Using HTML space here rather than CSS space since we don't do preprocessi ng
307 while (isHTMLSpace<UChar>(m_input->currentInputChar()))
308 consume();
309 }
310
311 bool MediaQueryTokenizer::consumeIfNext(UChar character)
312 {
313 if (m_input->currentInputChar() == character) {
314 consume();
315 return true;
316 }
317 return false;
318 }
319
320 // http://www.w3.org/TR/css3-syntax/#consume-a-name
321 String MediaQueryTokenizer::consumeName()
322 {
323 // FIXME: Is this as efficient as it can be?
324 // The possibility of escape chars mandates a copy AFAICT.
325 Vector<UChar> result;
326 while (true) {
327 if (isNameChar(m_input->currentInputChar())) {
328 result.append(consume());
329 continue;
330 }
331 if (nextTwoCharsAreValidEscape()) {
332 // "consume()" fixes a spec bug.
333 // The first code point should be consumed before consuming the esca ped code point.
334 consume();
335 result.append(consumeEscape());
336 continue;
337 }
338 return String(result);
339 }
340 }
341
342 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point
343 UChar MediaQueryTokenizer::consumeEscape()
344 {
345 UChar cc = consume();
346 ASSERT(cc != '\n');
347 if (isASCIIHexDigit(cc)) {
348 unsigned consumedHexDigits = 1;
349 String hexChars;
350 do {
351 hexChars.append(cc);
352 cc = consume();
353 consumedHexDigits++;
354 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc));
355 bool ok = false;
356 UChar codePoint = hexChars.toUIntStrict(&ok, 16);
357 if (!ok)
358 return WTF::Unicode::replacementCharacter;
359 return codePoint;
360 }
361
362 // Replaces NULLs with replacement characters, since we do not perform prepr ocessing
363 if (cc == kEndOfFileMarker)
364 return WTF::Unicode::replacementCharacter;
365 return cc;
366 }
367
368 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()
369 {
370 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
371 }
372
373 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number
374 bool MediaQueryTokenizer::nextCharsAreNumber()
375 {
376 UChar first = m_input->currentInputChar();
377 UChar second = m_input->peek(1);
378 if (isASCIIDigit(first))
379 return true;
380 if (first == '+' || first == '-')
381 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input ->peek(2))));
382 if (first =='.')
383 return (isASCIIDigit(second));
384 return false;
385 }
386
387 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier
388 bool MediaQueryTokenizer::nextCharsAreIdentifier()
389 {
390 UChar firstChar = m_input->currentInputChar();
391 if (isNameStart(firstChar) || nextTwoCharsAreValidEscape())
392 return true;
393
394 if (firstChar == '-') {
395 if (isNameStart(m_input->peek(1)))
396 return true;
397 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2));
398 }
399
400 return false;
401 }
402
403 } // namespace WebCore
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698