OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2013 Google Inc. All rights reserved. |
| 3 * |
| 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are |
| 6 * met: |
| 7 * |
| 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above |
| 11 * copyright notice, this list of conditions and the following disclaimer |
| 12 * in the documentation and/or other materials provided with the |
| 13 * distribution. |
| 14 * * Neither the name of Google Inc. nor the names of its |
| 15 * contributors may be used to endorse or promote products derived from |
| 16 * this software without specific prior written permission. |
| 17 * |
| 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 */ |
| 30 |
| 31 #include "config.h" |
| 32 #include "core/css/parser/MediaQueryTokenizer.h" |
| 33 |
| 34 #include "core/css/parser/CSSInputStream.h" |
| 35 #include "core/html/parser/HTMLParserIdioms.h" |
| 36 #include "wtf/unicode/CharacterNames.h" |
| 37 #include <cfloat> |
| 38 |
| 39 namespace WebCore { |
| 40 |
| 41 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point |
| 42 static bool isNameStart(UChar c) |
| 43 { |
| 44 if (isASCIIAlpha(c)) |
| 45 return true; |
| 46 if (c == '_') |
| 47 return true; |
| 48 return !isASCII(c); |
| 49 } |
| 50 |
| 51 // http://www.w3.org/TR/css-syntax-3/#name-code-point |
| 52 static bool isNameChar(UChar c) |
| 53 { |
| 54 return isNameStart(c) || isASCIIDigit(c) || c == '-'; |
| 55 } |
| 56 |
| 57 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap
e |
| 58 static bool twoCharsAreValidEscape(UChar first, UChar second) |
| 59 { |
| 60 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker))
; |
| 61 } |
| 62 |
| 63 MediaQueryTokenizer::MediaQueryTokenizer() |
| 64 { |
| 65 } |
| 66 |
| 67 void MediaQueryTokenizer::reconsume(UChar c) |
| 68 { |
| 69 m_input->pushBack(c); |
| 70 } |
| 71 |
| 72 UChar MediaQueryTokenizer::consume() |
| 73 { |
| 74 UChar current = m_input->currentInputChar(); |
| 75 m_input->advance(); |
| 76 return current; |
| 77 } |
| 78 |
| 79 void MediaQueryTokenizer::consume(unsigned offset) |
| 80 { |
| 81 m_input->advance(offset); |
| 82 } |
| 83 |
| 84 CSSToken MediaQueryTokenizer::whiteSpace(UChar cc) |
| 85 { |
| 86 // CSS Tokenization is currently lossy, but we could record |
| 87 // the exact whitespace instead of discarding it here. |
| 88 consumeUntilNotWhitespace(); |
| 89 return CSSToken(WhitespaceToken); |
| 90 } |
| 91 |
| 92 CSSToken MediaQueryTokenizer::leftParen(UChar cc) |
| 93 { |
| 94 return CSSToken(LeftParenToken); |
| 95 } |
| 96 |
| 97 CSSToken MediaQueryTokenizer::rightParen(UChar cc) |
| 98 { |
| 99 return CSSToken(RightParenToken); |
| 100 } |
| 101 |
| 102 CSSToken MediaQueryTokenizer::plusOrFullStop(UChar cc) |
| 103 { |
| 104 if (nextCharsAreNumber()) { |
| 105 reconsume(cc); |
| 106 return consumeNumericToken(); |
| 107 } |
| 108 return CSSToken(DelimToken, cc); |
| 109 } |
| 110 |
| 111 CSSToken MediaQueryTokenizer::comma(UChar cc) |
| 112 { |
| 113 return CSSToken(CommaToken); |
| 114 } |
| 115 |
| 116 CSSToken MediaQueryTokenizer::hyphenMinus(UChar cc) |
| 117 { |
| 118 if (nextCharsAreNumber()) { |
| 119 reconsume(cc); |
| 120 return consumeNumericToken(); |
| 121 } |
| 122 if (nextCharsAreIdentifier()) { |
| 123 reconsume(cc); |
| 124 return consumeIdentLikeToken(); |
| 125 } |
| 126 return CSSToken(DelimToken, cc); |
| 127 } |
| 128 |
| 129 CSSToken MediaQueryTokenizer::solidus(UChar cc) |
| 130 { |
| 131 return CSSToken(DelimToken, cc); |
| 132 } |
| 133 |
| 134 CSSToken MediaQueryTokenizer::colon(UChar cc) |
| 135 { |
| 136 return CSSToken(ColonToken); |
| 137 } |
| 138 |
| 139 CSSToken MediaQueryTokenizer::semiColon(UChar cc) |
| 140 { |
| 141 return CSSToken(SemicolonToken); |
| 142 } |
| 143 |
| 144 CSSToken MediaQueryTokenizer::reverseSolidus(UChar cc) |
| 145 { |
| 146 if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) { |
| 147 reconsume(cc); |
| 148 return consumeIdentLikeToken(); |
| 149 } |
| 150 return CSSToken(DelimToken, cc); |
| 151 } |
| 152 |
| 153 CSSToken MediaQueryTokenizer::asciiDigit(UChar cc) |
| 154 { |
| 155 reconsume(cc); |
| 156 return consumeNumericToken(); |
| 157 } |
| 158 |
| 159 CSSToken MediaQueryTokenizer::nameStart(UChar cc) |
| 160 { |
| 161 reconsume(cc); |
| 162 return consumeIdentLikeToken(); |
| 163 } |
| 164 |
| 165 CSSToken MediaQueryTokenizer::endOfFile(UChar cc) |
| 166 { |
| 167 return CSSToken(EOFToken); |
| 168 } |
| 169 |
| 170 void MediaQueryTokenizer::tokenize(String string, Vector<CSSToken>& outTokens) |
| 171 { |
| 172 MediaQueryTokenizer tokenizer; |
| 173 // According to the spec, we should perform preprocessing here. |
| 174 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing |
| 175 // |
| 176 // However, we can skip this step since: |
| 177 // * We're using HTML spaces (which accept \r and \f as a valid white space) |
| 178 // * Do not count white spaces |
| 179 // * consumeEscape replaces NULLs for replacement characters |
| 180 |
| 181 CSSInputStream input(string); |
| 182 while (true) { |
| 183 outTokens.append(tokenizer.nextToken(input)); |
| 184 if (outTokens.last().type() == EOFToken) |
| 185 return; |
| 186 } |
| 187 } |
| 188 |
| 189 CSSToken MediaQueryTokenizer::nextToken(CSSInputStream& input) |
| 190 { |
| 191 // Unlike the HTMLTokenizer, the CSS Syntax spec is written |
| 192 // as a stateless, (fixed-size) look-ahead tokenizer. |
| 193 // We could move to the stateful model and instead create |
| 194 // states for all the "next 3 codepoints are X" cases. |
| 195 // State-machine tokenizers are easier to write to handle |
| 196 // incremental tokenization of partial sources. |
| 197 // However, for now we follow the spec exactly. |
| 198 m_input = &input; |
| 199 UChar cc = consume(); |
| 200 CodePoint codePointFunc = 0; |
| 201 |
| 202 if (isASCII(cc)) { |
| 203 ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM); |
| 204 codePointFunc = getCodePoints()->codePoints[cc]; |
| 205 } else { |
| 206 codePointFunc = &MediaQueryTokenizer::nameStart; |
| 207 } |
| 208 |
| 209 if (codePointFunc) |
| 210 return ((this)->*(codePointFunc))(cc); |
| 211 |
| 212 return CSSToken(DelimToken, cc); |
| 213 } |
| 214 |
| 215 // This method merges the following spec sections for efficiency |
| 216 // http://www.w3.org/TR/css3-syntax/#consume-a-number |
| 217 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number |
| 218 CSSToken MediaQueryTokenizer::consumeNumber() |
| 219 { |
| 220 ASSERT(nextCharsAreNumber()); |
| 221 NumericValueType type = IntegerValueType; |
| 222 double value = 0; |
| 223 int sign = 1; |
| 224 unsigned peekOffset = 0; |
| 225 int exponentSign = 1; |
| 226 unsigned exponentStartPos = 0; |
| 227 unsigned exponentEndPos = 0; |
| 228 unsigned fractionStartPos = 0; |
| 229 unsigned fractionEndPos = 0; |
| 230 unsigned long long integerPart; |
| 231 unsigned long long fractionPart; |
| 232 unsigned fractionDigits; |
| 233 unsigned long long exponentPart; |
| 234 if (m_input->currentInputChar() == '+') { |
| 235 ++peekOffset; |
| 236 } else if (m_input->peek(peekOffset) == '-') { |
| 237 sign = -1; |
| 238 ++peekOffset; |
| 239 } |
| 240 unsigned intStartPos = peekOffset; |
| 241 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); |
| 242 unsigned intEndPos = peekOffset; |
| 243 if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOff
set))) { |
| 244 fractionStartPos = peekOffset; |
| 245 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); |
| 246 fractionEndPos = peekOffset; |
| 247 } |
| 248 if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e'))
{ |
| 249 int peekOffsetBeforeExponent = peekOffset; |
| 250 ++peekOffset; |
| 251 if (m_input->peek(peekOffset) == '+') { |
| 252 ++peekOffset; |
| 253 } else if (m_input->peek(peekOffset) =='-') { |
| 254 exponentSign = -1; |
| 255 ++peekOffset; |
| 256 } |
| 257 exponentStartPos = peekOffset; |
| 258 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); |
| 259 exponentEndPos = peekOffset; |
| 260 if (exponentEndPos == exponentStartPos) |
| 261 peekOffset = peekOffsetBeforeExponent; |
| 262 } |
| 263 integerPart = m_input->getNumber(intStartPos, intEndPos); |
| 264 fractionDigits = fractionEndPos - fractionStartPos; |
| 265 unsigned floatingFractionEndPos = fractionEndPos; |
| 266 if (fractionDigits > DBL_DIG) { |
| 267 // Limit the number of fraction digits, to avoid double (and fractionPar
t) from overflowing |
| 268 fractionDigits = DBL_DIG; |
| 269 floatingFractionEndPos = fractionStartPos + DBL_DIG; |
| 270 } |
| 271 fractionPart = m_input->getNumber(fractionStartPos, floatingFractionEndPos); |
| 272 exponentPart = m_input->getNumber(exponentStartPos, exponentEndPos); |
| 273 double fractionDivisor = pow((double)10.0, (double)(fractionDigits)); |
| 274 double exponent = pow(10, (float)exponentSign * (double)exponentPart); |
| 275 value = (double)sign * ((double)integerPart + (double)fractionPart / fractio
nDivisor) * exponent; |
| 276 |
| 277 m_input->advance(peekOffset); |
| 278 if (fractionDigits > 0) |
| 279 type = NumberValueType; |
| 280 |
| 281 return CSSToken(NumberToken, value, type); |
| 282 } |
| 283 |
| 284 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token |
| 285 CSSToken MediaQueryTokenizer::consumeNumericToken() |
| 286 { |
| 287 CSSToken token = consumeNumber(); |
| 288 if (nextCharsAreIdentifier()) |
| 289 token.convertToDimensionWithUnit(consumeName()); |
| 290 else if (consumeIfNext('%')) |
| 291 token.convertToPercentage(); |
| 292 return token; |
| 293 } |
| 294 |
| 295 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token |
| 296 CSSToken MediaQueryTokenizer::consumeIdentLikeToken() |
| 297 { |
| 298 String name = consumeName(); |
| 299 if (consumeIfNext('(')) |
| 300 return CSSToken(FunctionToken, name); |
| 301 return CSSToken(IdentToken, name); |
| 302 } |
| 303 |
| 304 void MediaQueryTokenizer::consumeUntilNotWhitespace() |
| 305 { |
| 306 // Using HTML space here rather than CSS space since we don't do preprocessi
ng |
| 307 while (isHTMLSpace<UChar>(m_input->currentInputChar())) |
| 308 consume(); |
| 309 } |
| 310 |
| 311 bool MediaQueryTokenizer::consumeIfNext(UChar character) |
| 312 { |
| 313 if (m_input->currentInputChar() == character) { |
| 314 consume(); |
| 315 return true; |
| 316 } |
| 317 return false; |
| 318 } |
| 319 |
| 320 // http://www.w3.org/TR/css3-syntax/#consume-a-name |
| 321 String MediaQueryTokenizer::consumeName() |
| 322 { |
| 323 // FIXME: Is this as efficient as it can be? |
| 324 // The possibility of escape chars mandates a copy AFAICT. |
| 325 Vector<UChar> result; |
| 326 while (true) { |
| 327 if (isNameChar(m_input->currentInputChar())) { |
| 328 result.append(consume()); |
| 329 continue; |
| 330 } |
| 331 if (nextTwoCharsAreValidEscape()) { |
| 332 // "consume()" fixes a spec bug. |
| 333 // The first code point should be consumed before consuming the esca
ped code point. |
| 334 consume(); |
| 335 result.append(consumeEscape()); |
| 336 continue; |
| 337 } |
| 338 return String(result); |
| 339 } |
| 340 } |
| 341 |
| 342 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point |
| 343 UChar MediaQueryTokenizer::consumeEscape() |
| 344 { |
| 345 UChar cc = consume(); |
| 346 ASSERT(cc != '\n'); |
| 347 if (isASCIIHexDigit(cc)) { |
| 348 unsigned consumedHexDigits = 1; |
| 349 String hexChars; |
| 350 do { |
| 351 hexChars.append(cc); |
| 352 cc = consume(); |
| 353 consumedHexDigits++; |
| 354 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc)); |
| 355 bool ok = false; |
| 356 UChar codePoint = hexChars.toUIntStrict(&ok, 16); |
| 357 if (!ok) |
| 358 return WTF::Unicode::replacementCharacter; |
| 359 return codePoint; |
| 360 } |
| 361 |
| 362 // Replaces NULLs with replacement characters, since we do not perform prepr
ocessing |
| 363 if (cc == kEndOfFileMarker) |
| 364 return WTF::Unicode::replacementCharacter; |
| 365 return cc; |
| 366 } |
| 367 |
| 368 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape() |
| 369 { |
| 370 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2)); |
| 371 } |
| 372 |
| 373 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number |
| 374 bool MediaQueryTokenizer::nextCharsAreNumber() |
| 375 { |
| 376 UChar first = m_input->currentInputChar(); |
| 377 UChar second = m_input->peek(1); |
| 378 if (isASCIIDigit(first)) |
| 379 return true; |
| 380 if (first == '+' || first == '-') |
| 381 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input
->peek(2)))); |
| 382 if (first =='.') |
| 383 return (isASCIIDigit(second)); |
| 384 return false; |
| 385 } |
| 386 |
| 387 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier |
| 388 bool MediaQueryTokenizer::nextCharsAreIdentifier() |
| 389 { |
| 390 UChar firstChar = m_input->currentInputChar(); |
| 391 if (isNameStart(firstChar) || nextTwoCharsAreValidEscape()) |
| 392 return true; |
| 393 |
| 394 if (firstChar == '-') { |
| 395 if (isNameStart(m_input->peek(1))) |
| 396 return true; |
| 397 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2)); |
| 398 } |
| 399 |
| 400 return false; |
| 401 } |
| 402 |
| 403 } // namespace WebCore |
OLD | NEW |