| OLD | NEW |
| (Empty) |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "config.h" | |
| 6 #include "core/css/parser/MediaQueryTokenizer.h" | |
| 7 | |
| 8 #include "core/css/parser/MediaQueryInputStream.h" | |
| 9 #include "core/html/parser/HTMLParserIdioms.h" | |
| 10 #include "wtf/unicode/CharacterNames.h" | |
| 11 | |
| 12 namespace WebCore { | |
| 13 | |
| 14 const unsigned codePointsNumber = SCHAR_MAX; | |
| 15 | |
| 16 class MediaQueryTokenizer::CodePoints { | |
| 17 public: | |
| 18 MediaQueryTokenizer::CodePoint codePoints[codePointsNumber]; | |
| 19 | |
| 20 // FIXME: Move the codePoint array to be a static one, generated by build sc
ripts | |
| 21 CodePoints() | |
| 22 { | |
| 23 memset(codePoints, 0, codePointsNumber); | |
| 24 codePoints['\n'] = &MediaQueryTokenizer::whiteSpace; | |
| 25 codePoints['\r'] = &MediaQueryTokenizer::whiteSpace; | |
| 26 codePoints['\t'] = &MediaQueryTokenizer::whiteSpace; | |
| 27 codePoints[' '] = &MediaQueryTokenizer::whiteSpace; | |
| 28 codePoints['\f'] = &MediaQueryTokenizer::whiteSpace; | |
| 29 codePoints['('] = &MediaQueryTokenizer::leftParenthesis; | |
| 30 codePoints[')'] = &MediaQueryTokenizer::rightParenthesis; | |
| 31 codePoints['+'] = &MediaQueryTokenizer::plusOrFullStop; | |
| 32 codePoints['.'] = &MediaQueryTokenizer::plusOrFullStop; | |
| 33 codePoints[','] = &MediaQueryTokenizer::comma; | |
| 34 codePoints['-'] = &MediaQueryTokenizer::hyphenMinus; | |
| 35 codePoints['/'] = &MediaQueryTokenizer::solidus; | |
| 36 codePoints[':'] = &MediaQueryTokenizer::colon; | |
| 37 codePoints[';'] = &MediaQueryTokenizer::semiColon; | |
| 38 codePoints['\\'] = &MediaQueryTokenizer::reverseSolidus; | |
| 39 for (unsigned char digit = '0'; digit <= '9'; ++digit) | |
| 40 codePoints[digit] = &MediaQueryTokenizer::asciiDigit; | |
| 41 for (unsigned char alpha = 'a'; alpha <= 'z'; ++alpha) | |
| 42 codePoints[alpha] = &MediaQueryTokenizer::nameStart; | |
| 43 for (unsigned char alpha = 'A'; alpha <= 'Z'; ++alpha) | |
| 44 codePoints[alpha] = &MediaQueryTokenizer::nameStart; | |
| 45 codePoints['_'] = &MediaQueryTokenizer::nameStart; | |
| 46 codePoints[kEndOfFileMarker] = &MediaQueryTokenizer::endOfFile; | |
| 47 } | |
| 48 }; | |
| 49 | |
| 50 MediaQueryTokenizer::CodePoints* MediaQueryTokenizer::codePoints() | |
| 51 { | |
| 52 static CodePoints codePoints; | |
| 53 return &codePoints; | |
| 54 } | |
| 55 | |
| 56 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point | |
| 57 static bool isNameStart(UChar c) | |
| 58 { | |
| 59 if (isASCIIAlpha(c)) | |
| 60 return true; | |
| 61 if (c == '_') | |
| 62 return true; | |
| 63 return !isASCII(c); | |
| 64 } | |
| 65 | |
| 66 // http://www.w3.org/TR/css-syntax-3/#name-code-point | |
| 67 static bool isNameChar(UChar c) | |
| 68 { | |
| 69 return isNameStart(c) || isASCIIDigit(c) || c == '-'; | |
| 70 } | |
| 71 | |
| 72 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap
e | |
| 73 static bool twoCharsAreValidEscape(UChar first, UChar second) | |
| 74 { | |
| 75 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker))
; | |
| 76 } | |
| 77 | |
| 78 MediaQueryTokenizer::MediaQueryTokenizer(MediaQueryInputStream& inputStream) | |
| 79 : m_input(inputStream) | |
| 80 { | |
| 81 } | |
| 82 | |
| 83 void MediaQueryTokenizer::reconsume(UChar c) | |
| 84 { | |
| 85 m_input.pushBack(c); | |
| 86 } | |
| 87 | |
| 88 UChar MediaQueryTokenizer::consume() | |
| 89 { | |
| 90 UChar current = m_input.currentInputChar(); | |
| 91 m_input.advance(); | |
| 92 return current; | |
| 93 } | |
| 94 | |
| 95 void MediaQueryTokenizer::consume(unsigned offset) | |
| 96 { | |
| 97 m_input.advance(offset); | |
| 98 } | |
| 99 | |
| 100 MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc) | |
| 101 { | |
| 102 // CSS Tokenization is currently lossy, but we could record | |
| 103 // the exact whitespace instead of discarding it here. | |
| 104 consumeUntilNonWhitespace(); | |
| 105 return MediaQueryToken(WhitespaceToken); | |
| 106 } | |
| 107 | |
| 108 MediaQueryToken MediaQueryTokenizer::leftParenthesis(UChar cc) | |
| 109 { | |
| 110 return MediaQueryToken(LeftParenthesisToken); | |
| 111 } | |
| 112 | |
| 113 MediaQueryToken MediaQueryTokenizer::rightParenthesis(UChar cc) | |
| 114 { | |
| 115 return MediaQueryToken(RightParenthesisToken); | |
| 116 } | |
| 117 | |
| 118 MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc) | |
| 119 { | |
| 120 if (nextCharsAreNumber()) { | |
| 121 reconsume(cc); | |
| 122 return consumeNumericToken(); | |
| 123 } | |
| 124 return MediaQueryToken(DelimiterToken, cc); | |
| 125 } | |
| 126 | |
| 127 MediaQueryToken MediaQueryTokenizer::comma(UChar cc) | |
| 128 { | |
| 129 return MediaQueryToken(CommaToken); | |
| 130 } | |
| 131 | |
| 132 MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc) | |
| 133 { | |
| 134 if (nextCharsAreNumber()) { | |
| 135 reconsume(cc); | |
| 136 return consumeNumericToken(); | |
| 137 } | |
| 138 if (nextCharsAreIdentifier()) { | |
| 139 reconsume(cc); | |
| 140 return consumeIdentLikeToken(); | |
| 141 } | |
| 142 return MediaQueryToken(DelimiterToken, cc); | |
| 143 } | |
| 144 | |
| 145 MediaQueryToken MediaQueryTokenizer::solidus(UChar cc) | |
| 146 { | |
| 147 return MediaQueryToken(DelimiterToken, cc); | |
| 148 } | |
| 149 | |
| 150 MediaQueryToken MediaQueryTokenizer::colon(UChar cc) | |
| 151 { | |
| 152 return MediaQueryToken(ColonToken); | |
| 153 } | |
| 154 | |
| 155 MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc) | |
| 156 { | |
| 157 return MediaQueryToken(SemicolonToken); | |
| 158 } | |
| 159 | |
| 160 MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc) | |
| 161 { | |
| 162 if (twoCharsAreValidEscape(cc, m_input.currentInputChar())) { | |
| 163 reconsume(cc); | |
| 164 return consumeIdentLikeToken(); | |
| 165 } | |
| 166 return MediaQueryToken(DelimiterToken, cc); | |
| 167 } | |
| 168 | |
| 169 MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc) | |
| 170 { | |
| 171 reconsume(cc); | |
| 172 return consumeNumericToken(); | |
| 173 } | |
| 174 | |
| 175 MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc) | |
| 176 { | |
| 177 reconsume(cc); | |
| 178 return consumeIdentLikeToken(); | |
| 179 } | |
| 180 | |
| 181 MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc) | |
| 182 { | |
| 183 return MediaQueryToken(EOFToken); | |
| 184 } | |
| 185 | |
| 186 void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTo
kens) | |
| 187 { | |
| 188 // According to the spec, we should perform preprocessing here. | |
| 189 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing | |
| 190 // | |
| 191 // However, we can skip this step since: | |
| 192 // * We're using HTML spaces (which accept \r and \f as a valid white space) | |
| 193 // * Do not count white spaces | |
| 194 // * consumeEscape replaces NULLs for replacement characters | |
| 195 | |
| 196 MediaQueryInputStream input(string); | |
| 197 MediaQueryTokenizer tokenizer(input); | |
| 198 while (true) { | |
| 199 outTokens.append(tokenizer.nextToken()); | |
| 200 if (outTokens.last().type() == EOFToken) | |
| 201 return; | |
| 202 } | |
| 203 } | |
| 204 | |
| 205 MediaQueryToken MediaQueryTokenizer::nextToken() | |
| 206 { | |
| 207 // Unlike the HTMLTokenizer, the CSS Syntax spec is written | |
| 208 // as a stateless, (fixed-size) look-ahead tokenizer. | |
| 209 // We could move to the stateful model and instead create | |
| 210 // states for all the "next 3 codepoints are X" cases. | |
| 211 // State-machine tokenizers are easier to write to handle | |
| 212 // incremental tokenization of partial sources. | |
| 213 // However, for now we follow the spec exactly. | |
| 214 UChar cc = consume(); | |
| 215 CodePoint codePointFunc = 0; | |
| 216 | |
| 217 if (isASCII(cc)) { | |
| 218 ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber); | |
| 219 codePointFunc = codePoints()->codePoints[cc]; | |
| 220 } else { | |
| 221 codePointFunc = &MediaQueryTokenizer::nameStart; | |
| 222 } | |
| 223 | |
| 224 if (codePointFunc) | |
| 225 return ((this)->*(codePointFunc))(cc); | |
| 226 | |
| 227 return MediaQueryToken(DelimiterToken, cc); | |
| 228 } | |
| 229 | |
| 230 static int getSign(MediaQueryInputStream& input, unsigned& offset) | |
| 231 { | |
| 232 int sign = 1; | |
| 233 if (input.currentInputChar() == '+') { | |
| 234 ++offset; | |
| 235 } else if (input.peek(offset) == '-') { | |
| 236 sign = -1; | |
| 237 ++offset; | |
| 238 } | |
| 239 return sign; | |
| 240 } | |
| 241 | |
| 242 static unsigned long long getInteger(MediaQueryInputStream& input, unsigned& off
set) | |
| 243 { | |
| 244 unsigned intStartPos = offset; | |
| 245 offset = input.skipWhilePredicate<isASCIIDigit>(offset); | |
| 246 unsigned intEndPos = offset; | |
| 247 return input.getUInt(intStartPos, intEndPos); | |
| 248 } | |
| 249 | |
| 250 static double getFraction(MediaQueryInputStream& input, unsigned& offset, unsign
ed& digitsNumber) | |
| 251 { | |
| 252 unsigned fractionStartPos = 0; | |
| 253 unsigned fractionEndPos = 0; | |
| 254 if (input.peek(offset) == '.' && isASCIIDigit(input.peek(++offset))) { | |
| 255 fractionStartPos = offset - 1; | |
| 256 offset = input.skipWhilePredicate<isASCIIDigit>(offset); | |
| 257 fractionEndPos = offset; | |
| 258 } | |
| 259 digitsNumber = fractionEndPos- fractionStartPos; | |
| 260 return input.getDouble(fractionStartPos, fractionEndPos); | |
| 261 } | |
| 262 | |
| 263 static unsigned long long getExponent(MediaQueryInputStream& input, unsigned& of
fset, int sign) | |
| 264 { | |
| 265 unsigned exponentStartPos = 0; | |
| 266 unsigned exponentEndPos = 0; | |
| 267 if ((input.peek(offset) == 'E' || input.peek(offset) == 'e')) { | |
| 268 int offsetBeforeExponent = offset; | |
| 269 ++offset; | |
| 270 if (input.peek(offset) == '+') { | |
| 271 ++offset; | |
| 272 } else if (input.peek(offset) =='-') { | |
| 273 sign = -1; | |
| 274 ++offset; | |
| 275 } | |
| 276 exponentStartPos = offset; | |
| 277 offset = input.skipWhilePredicate<isASCIIDigit>(offset); | |
| 278 exponentEndPos = offset; | |
| 279 if (exponentEndPos == exponentStartPos) | |
| 280 offset = offsetBeforeExponent; | |
| 281 } | |
| 282 return input.getUInt(exponentStartPos, exponentEndPos); | |
| 283 } | |
| 284 | |
| 285 // This method merges the following spec sections for efficiency | |
| 286 // http://www.w3.org/TR/css3-syntax/#consume-a-number | |
| 287 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number | |
| 288 MediaQueryToken MediaQueryTokenizer::consumeNumber() | |
| 289 { | |
| 290 ASSERT(nextCharsAreNumber()); | |
| 291 NumericValueType type = IntegerValueType; | |
| 292 double value = 0; | |
| 293 unsigned offset = 0; | |
| 294 int exponentSign = 1; | |
| 295 unsigned fractionDigits; | |
| 296 int sign = getSign(m_input, offset); | |
| 297 unsigned long long integerPart = getInteger(m_input, offset); | |
| 298 double fractionPart = getFraction(m_input, offset, fractionDigits); | |
| 299 unsigned long long exponentPart = getExponent(m_input, offset, exponentSign)
; | |
| 300 double exponent = pow(10, (float)exponentSign * (double)exponentPart); | |
| 301 value = (double)sign * ((double)integerPart + fractionPart) * exponent; | |
| 302 | |
| 303 m_input.advance(offset); | |
| 304 if (fractionDigits > 0) | |
| 305 type = NumberValueType; | |
| 306 | |
| 307 return MediaQueryToken(NumberToken, value, type); | |
| 308 } | |
| 309 | |
| 310 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token | |
| 311 MediaQueryToken MediaQueryTokenizer::consumeNumericToken() | |
| 312 { | |
| 313 MediaQueryToken token = consumeNumber(); | |
| 314 if (nextCharsAreIdentifier()) | |
| 315 token.convertToDimensionWithUnit(consumeName()); | |
| 316 else if (consumeIfNext('%')) | |
| 317 token.convertToPercentage(); | |
| 318 return token; | |
| 319 } | |
| 320 | |
| 321 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token | |
| 322 MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken() | |
| 323 { | |
| 324 String name = consumeName(); | |
| 325 if (consumeIfNext('(')) | |
| 326 return MediaQueryToken(FunctionToken, name); | |
| 327 return MediaQueryToken(IdentToken, name); | |
| 328 } | |
| 329 | |
| 330 void MediaQueryTokenizer::consumeUntilNonWhitespace() | |
| 331 { | |
| 332 // Using HTML space here rather than CSS space since we don't do preprocessi
ng | |
| 333 while (isHTMLSpace<UChar>(m_input.currentInputChar())) | |
| 334 consume(); | |
| 335 } | |
| 336 | |
| 337 bool MediaQueryTokenizer::consumeIfNext(UChar character) | |
| 338 { | |
| 339 if (m_input.currentInputChar() == character) { | |
| 340 consume(); | |
| 341 return true; | |
| 342 } | |
| 343 return false; | |
| 344 } | |
| 345 | |
| 346 // http://www.w3.org/TR/css3-syntax/#consume-a-name | |
| 347 String MediaQueryTokenizer::consumeName() | |
| 348 { | |
| 349 // FIXME: Is this as efficient as it can be? | |
| 350 // The possibility of escape chars mandates a copy AFAICT. | |
| 351 Vector<UChar> result; | |
| 352 while (true) { | |
| 353 if (isNameChar(m_input.currentInputChar())) { | |
| 354 result.append(consume()); | |
| 355 continue; | |
| 356 } | |
| 357 if (nextTwoCharsAreValidEscape()) { | |
| 358 // "consume()" fixes a spec bug. | |
| 359 // The first code point should be consumed before consuming the esca
ped code point. | |
| 360 consume(); | |
| 361 result.append(consumeEscape()); | |
| 362 continue; | |
| 363 } | |
| 364 return String(result); | |
| 365 } | |
| 366 } | |
| 367 | |
| 368 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point | |
| 369 UChar MediaQueryTokenizer::consumeEscape() | |
| 370 { | |
| 371 UChar cc = consume(); | |
| 372 ASSERT(cc != '\n'); | |
| 373 if (isASCIIHexDigit(cc)) { | |
| 374 unsigned consumedHexDigits = 1; | |
| 375 String hexChars; | |
| 376 do { | |
| 377 hexChars.append(cc); | |
| 378 cc = consume(); | |
| 379 consumedHexDigits++; | |
| 380 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc)); | |
| 381 bool ok = false; | |
| 382 UChar codePoint = hexChars.toUIntStrict(&ok, 16); | |
| 383 if (!ok) | |
| 384 return WTF::Unicode::replacementCharacter; | |
| 385 return codePoint; | |
| 386 } | |
| 387 | |
| 388 // Replaces NULLs with replacement characters, since we do not perform prepr
ocessing | |
| 389 if (cc == kEndOfFileMarker) | |
| 390 return WTF::Unicode::replacementCharacter; | |
| 391 return cc; | |
| 392 } | |
| 393 | |
| 394 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape() | |
| 395 { | |
| 396 if (m_input.leftChars() < 2) | |
| 397 return false; | |
| 398 return twoCharsAreValidEscape(m_input.peek(1), m_input.peek(2)); | |
| 399 } | |
| 400 | |
| 401 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number | |
| 402 bool MediaQueryTokenizer::nextCharsAreNumber() | |
| 403 { | |
| 404 UChar first = m_input.currentInputChar(); | |
| 405 UChar second = m_input.peek(1); | |
| 406 if (isASCIIDigit(first)) | |
| 407 return true; | |
| 408 if (first == '+' || first == '-') | |
| 409 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input
.peek(2)))); | |
| 410 if (first =='.') | |
| 411 return (isASCIIDigit(second)); | |
| 412 return false; | |
| 413 } | |
| 414 | |
| 415 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier | |
| 416 bool MediaQueryTokenizer::nextCharsAreIdentifier() | |
| 417 { | |
| 418 UChar firstChar = m_input.currentInputChar(); | |
| 419 if (isNameStart(firstChar) || nextTwoCharsAreValidEscape()) | |
| 420 return true; | |
| 421 | |
| 422 if (firstChar == '-') { | |
| 423 if (isNameStart(m_input.peek(1))) | |
| 424 return true; | |
| 425 return nextTwoCharsAreValidEscape(); | |
| 426 } | |
| 427 | |
| 428 return false; | |
| 429 } | |
| 430 | |
| 431 } // namespace WebCore | |
| OLD | NEW |