OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "config.h" | |
6 #include "core/css/parser/MediaQueryTokenizer.h" | |
7 | |
8 #include "core/css/parser/MediaQueryInputStream.h" | |
9 #include "core/html/parser/HTMLParserIdioms.h" | |
10 #include "wtf/unicode/CharacterNames.h" | |
11 | |
12 namespace WebCore { | |
13 | |
14 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point | |
15 static bool isNameStart(UChar c) | |
16 { | |
17 if (isASCIIAlpha(c)) | |
18 return true; | |
19 if (c == '_') | |
20 return true; | |
21 return !isASCII(c); | |
22 } | |
23 | |
24 // http://www.w3.org/TR/css-syntax-3/#name-code-point | |
25 static bool isNameChar(UChar c) | |
26 { | |
27 return isNameStart(c) || isASCIIDigit(c) || c == '-'; | |
28 } | |
29 | |
30 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e | |
31 static bool twoCharsAreValidEscape(UChar first, UChar second) | |
32 { | |
33 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ; | |
34 } | |
35 | |
36 MediaQueryTokenizer::MediaQueryTokenizer() | |
37 { | |
38 } | |
39 | |
40 void MediaQueryTokenizer::reconsume(UChar c) | |
41 { | |
42 m_input->pushBack(c); | |
43 } | |
44 | |
45 UChar MediaQueryTokenizer::consume() | |
46 { | |
47 UChar current = m_input->currentInputChar(); | |
48 m_input->advance(); | |
49 return current; | |
50 } | |
51 | |
52 void MediaQueryTokenizer::consume(unsigned offset) | |
53 { | |
54 m_input->advance(offset); | |
55 } | |
56 | |
57 MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc) | |
58 { | |
59 // CSS Tokenization is currently lossy, but we could record | |
60 // the exact whitespace instead of discarding it here. | |
61 consumeUntilNotWhitespace(); | |
62 return MediaQueryToken(WhitespaceToken); | |
63 } | |
64 | |
65 MediaQueryToken MediaQueryTokenizer::leftParen(UChar cc) | |
66 { | |
67 return MediaQueryToken(LeftParenToken); | |
68 } | |
69 | |
70 MediaQueryToken MediaQueryTokenizer::rightParen(UChar cc) | |
71 { | |
72 return MediaQueryToken(RightParenToken); | |
73 } | |
74 | |
75 MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc) | |
76 { | |
77 if (nextCharsAreNumber()) { | |
78 reconsume(cc); | |
79 return consumeNumericToken(); | |
80 } | |
81 return MediaQueryToken(DelimToken, cc); | |
kenneth.r.christiansen
2014/03/08 22:37:47
DelimiterToken why not write it out, it is quite s
| |
82 } | |
83 | |
84 MediaQueryToken MediaQueryTokenizer::comma(UChar cc) | |
85 { | |
86 return MediaQueryToken(CommaToken); | |
87 } | |
88 | |
89 MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc) | |
90 { | |
91 if (nextCharsAreNumber()) { | |
92 reconsume(cc); | |
93 return consumeNumericToken(); | |
94 } | |
95 if (nextCharsAreIdentifier()) { | |
96 reconsume(cc); | |
97 return consumeIdentLikeToken(); | |
98 } | |
99 return MediaQueryToken(DelimToken, cc); | |
100 } | |
101 | |
102 MediaQueryToken MediaQueryTokenizer::solidus(UChar cc) | |
103 { | |
104 return MediaQueryToken(DelimToken, cc); | |
105 } | |
106 | |
107 MediaQueryToken MediaQueryTokenizer::colon(UChar cc) | |
108 { | |
109 return MediaQueryToken(ColonToken); | |
110 } | |
111 | |
112 MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc) | |
113 { | |
114 return MediaQueryToken(SemicolonToken); | |
115 } | |
116 | |
117 MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc) | |
118 { | |
119 if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) { | |
120 reconsume(cc); | |
121 return consumeIdentLikeToken(); | |
122 } | |
123 return MediaQueryToken(DelimToken, cc); | |
124 } | |
125 | |
126 MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc) | |
127 { | |
128 reconsume(cc); | |
129 return consumeNumericToken(); | |
130 } | |
131 | |
132 MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc) | |
133 { | |
134 reconsume(cc); | |
135 return consumeIdentLikeToken(); | |
136 } | |
137 | |
138 MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc) | |
139 { | |
140 return MediaQueryToken(EOFToken); | |
141 } | |
142 | |
143 void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTo kens) | |
144 { | |
145 MediaQueryTokenizer tokenizer; | |
146 // According to the spec, we should perform preprocessing here. | |
147 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing | |
148 // | |
149 // However, we can skip this step since: | |
150 // * We're using HTML spaces (which accept \r and \f as a valid white space) | |
151 // * Do not count white spaces | |
152 // * consumeEscape replaces NULLs for replacement characters | |
153 | |
154 MediaQueryInputStream input(string); | |
155 while (true) { | |
156 outTokens.append(tokenizer.nextToken(input)); | |
157 if (outTokens.last().type() == EOFToken) | |
158 return; | |
159 } | |
160 } | |
161 | |
162 MediaQueryToken MediaQueryTokenizer::nextToken(MediaQueryInputStream& input) | |
163 { | |
164 // Unlike the HTMLTokenizer, the CSS Syntax spec is written | |
165 // as a stateless, (fixed-size) look-ahead tokenizer. | |
166 // We could move to the stateful model and instead create | |
167 // states for all the "next 3 codepoints are X" cases. | |
168 // State-machine tokenizers are easier to write to handle | |
169 // incremental tokenization of partial sources. | |
170 // However, for now we follow the spec exactly. | |
171 m_input = &input; | |
172 UChar cc = consume(); | |
173 CodePoint codePointFunc = 0; | |
174 | |
175 if (isASCII(cc)) { | |
176 ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM); | |
177 codePointFunc = getCodePoints()->codePoints[cc]; | |
178 } else { | |
179 codePointFunc = &MediaQueryTokenizer::nameStart; | |
180 } | |
181 | |
182 if (codePointFunc) | |
183 return ((this)->*(codePointFunc))(cc); | |
184 | |
185 return MediaQueryToken(DelimToken, cc); | |
186 } | |
187 | |
188 // This method merges the following spec sections for efficiency | |
189 // http://www.w3.org/TR/css3-syntax/#consume-a-number | |
190 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number | |
191 MediaQueryToken MediaQueryTokenizer::consumeNumber() | |
192 { | |
193 ASSERT(nextCharsAreNumber()); | |
194 NumericValueType type = IntegerValueType; | |
195 double value = 0; | |
196 int sign = 1; | |
197 unsigned peekOffset = 0; | |
198 int exponentSign = 1; | |
199 unsigned exponentStartPos = 0; | |
200 unsigned exponentEndPos = 0; | |
201 unsigned fractionStartPos = 0; | |
202 unsigned fractionEndPos = 0; | |
203 unsigned long long integerPart; | |
204 double fractionPart; | |
205 unsigned fractionDigits; | |
206 unsigned long long exponentPart; | |
207 if (m_input->currentInputChar() == '+') { | |
208 ++peekOffset; | |
209 } else if (m_input->peek(peekOffset) == '-') { | |
210 sign = -1; | |
211 ++peekOffset; | |
212 } | |
213 unsigned intStartPos = peekOffset; | |
214 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); | |
215 unsigned intEndPos = peekOffset; | |
216 if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOff set))) { | |
217 fractionStartPos = peekOffset - 1; | |
218 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); | |
219 fractionEndPos = peekOffset; | |
220 } | |
221 if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e')) { | |
222 int peekOffsetBeforeExponent = peekOffset; | |
223 ++peekOffset; | |
224 if (m_input->peek(peekOffset) == '+') { | |
225 ++peekOffset; | |
226 } else if (m_input->peek(peekOffset) =='-') { | |
227 exponentSign = -1; | |
228 ++peekOffset; | |
229 } | |
230 exponentStartPos = peekOffset; | |
231 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); | |
232 exponentEndPos = peekOffset; | |
233 if (exponentEndPos == exponentStartPos) | |
234 peekOffset = peekOffsetBeforeExponent; | |
235 } | |
236 integerPart = m_input->getUInt(intStartPos, intEndPos); | |
237 fractionDigits = fractionEndPos - fractionStartPos; | |
238 unsigned floatingFractionEndPos = fractionEndPos; | |
239 fractionPart = m_input->getDouble(fractionStartPos, floatingFractionEndPos); | |
240 exponentPart = m_input->getUInt(exponentStartPos, exponentEndPos); | |
241 double exponent = pow(10, (float)exponentSign * (double)exponentPart); | |
242 value = (double)sign * ((double)integerPart + fractionPart) * exponent; | |
243 | |
244 m_input->advance(peekOffset); | |
245 if (fractionDigits > 0) | |
246 type = NumberValueType; | |
247 | |
248 return MediaQueryToken(NumberToken, value, type); | |
249 } | |
250 | |
251 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token | |
252 MediaQueryToken MediaQueryTokenizer::consumeNumericToken() | |
253 { | |
254 MediaQueryToken token = consumeNumber(); | |
255 if (nextCharsAreIdentifier()) | |
256 token.convertToDimensionWithUnit(consumeName()); | |
257 else if (consumeIfNext('%')) | |
258 token.convertToPercentage(); | |
259 return token; | |
260 } | |
261 | |
262 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token | |
263 MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken() | |
264 { | |
265 String name = consumeName(); | |
266 if (consumeIfNext('(')) | |
267 return MediaQueryToken(FunctionToken, name); | |
268 return MediaQueryToken(IdentToken, name); | |
269 } | |
270 | |
271 void MediaQueryTokenizer::consumeUntilNotWhitespace() | |
272 { | |
273 // Using HTML space here rather than CSS space since we don't do preprocessi ng | |
274 while (isHTMLSpace<UChar>(m_input->currentInputChar())) | |
275 consume(); | |
276 } | |
277 | |
278 bool MediaQueryTokenizer::consumeIfNext(UChar character) | |
279 { | |
280 if (m_input->currentInputChar() == character) { | |
281 consume(); | |
282 return true; | |
283 } | |
284 return false; | |
285 } | |
286 | |
287 // http://www.w3.org/TR/css3-syntax/#consume-a-name | |
288 String MediaQueryTokenizer::consumeName() | |
289 { | |
290 // FIXME: Is this as efficient as it can be? | |
291 // The possibility of escape chars mandates a copy AFAICT. | |
292 Vector<UChar> result; | |
293 while (true) { | |
294 if (isNameChar(m_input->currentInputChar())) { | |
295 result.append(consume()); | |
296 continue; | |
297 } | |
298 if (nextTwoCharsAreValidEscape()) { | |
299 // "consume()" fixes a spec bug. | |
300 // The first code point should be consumed before consuming the esca ped code point. | |
301 consume(); | |
302 result.append(consumeEscape()); | |
303 continue; | |
304 } | |
305 return String(result); | |
306 } | |
307 } | |
308 | |
309 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point | |
310 UChar MediaQueryTokenizer::consumeEscape() | |
311 { | |
312 UChar cc = consume(); | |
313 ASSERT(cc != '\n'); | |
314 if (isASCIIHexDigit(cc)) { | |
315 unsigned consumedHexDigits = 1; | |
316 String hexChars; | |
317 do { | |
318 hexChars.append(cc); | |
319 cc = consume(); | |
320 consumedHexDigits++; | |
321 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc)); | |
322 bool ok = false; | |
323 UChar codePoint = hexChars.toUIntStrict(&ok, 16); | |
324 if (!ok) | |
325 return WTF::Unicode::replacementCharacter; | |
326 return codePoint; | |
327 } | |
328 | |
329 // Replaces NULLs with replacement characters, since we do not perform prepr ocessing | |
330 if (cc == kEndOfFileMarker) | |
331 return WTF::Unicode::replacementCharacter; | |
332 return cc; | |
333 } | |
334 | |
335 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape() | |
336 { | |
337 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2)); | |
338 } | |
339 | |
340 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number | |
341 bool MediaQueryTokenizer::nextCharsAreNumber() | |
342 { | |
343 UChar first = m_input->currentInputChar(); | |
344 UChar second = m_input->peek(1); | |
345 if (isASCIIDigit(first)) | |
346 return true; | |
347 if (first == '+' || first == '-') | |
348 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input ->peek(2)))); | |
349 if (first =='.') | |
350 return (isASCIIDigit(second)); | |
351 return false; | |
352 } | |
353 | |
354 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier | |
355 bool MediaQueryTokenizer::nextCharsAreIdentifier() | |
356 { | |
357 UChar firstChar = m_input->currentInputChar(); | |
358 if (isNameStart(firstChar) || nextTwoCharsAreValidEscape()) | |
359 return true; | |
360 | |
361 if (firstChar == '-') { | |
362 if (isNameStart(m_input->peek(1))) | |
363 return true; | |
364 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2)); | |
365 } | |
366 | |
367 return false; | |
368 } | |
369 | |
370 } // namespace WebCore | |
OLD | NEW |