Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "config.h" | |
| 6 #include "core/css/parser/MediaQueryTokenizer.h" | |
| 7 | |
| 8 #include "core/css/parser/MediaQueryInputStream.h" | |
| 9 #include "core/html/parser/HTMLParserIdioms.h" | |
| 10 #include "wtf/unicode/CharacterNames.h" | |
| 11 | |
| 12 namespace WebCore { | |
| 13 | |
| 14 // http://dev.w3.org/csswg/css-syntax/#name-start-code-point | |
| 15 static bool isNameStart(UChar c) | |
| 16 { | |
| 17 if (isASCIIAlpha(c)) | |
| 18 return true; | |
| 19 if (c == '_') | |
| 20 return true; | |
| 21 return !isASCII(c); | |
| 22 } | |
| 23 | |
| 24 // http://www.w3.org/TR/css-syntax-3/#name-code-point | |
| 25 static bool isNameChar(UChar c) | |
| 26 { | |
| 27 return isNameStart(c) || isASCIIDigit(c) || c == '-'; | |
| 28 } | |
| 29 | |
| 30 // http://www.w3.org/TR/css-syntax-3/#check-if-two-code-points-are-a-valid-escap e | |
| 31 static bool twoCharsAreValidEscape(UChar first, UChar second) | |
| 32 { | |
| 33 return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker)) ; | |
| 34 } | |
| 35 | |
| 36 MediaQueryTokenizer::MediaQueryTokenizer() | |
| 37 { | |
| 38 } | |
| 39 | |
| 40 void MediaQueryTokenizer::reconsume(UChar c) | |
| 41 { | |
| 42 m_input->pushBack(c); | |
| 43 } | |
| 44 | |
| 45 UChar MediaQueryTokenizer::consume() | |
| 46 { | |
| 47 UChar current = m_input->currentInputChar(); | |
| 48 m_input->advance(); | |
| 49 return current; | |
| 50 } | |
| 51 | |
| 52 void MediaQueryTokenizer::consume(unsigned offset) | |
| 53 { | |
| 54 m_input->advance(offset); | |
| 55 } | |
| 56 | |
| 57 MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc) | |
| 58 { | |
| 59 // CSS Tokenization is currently lossy, but we could record | |
| 60 // the exact whitespace instead of discarding it here. | |
| 61 consumeUntilNotWhitespace(); | |
| 62 return MediaQueryToken(WhitespaceToken); | |
| 63 } | |
| 64 | |
| 65 MediaQueryToken MediaQueryTokenizer::leftParen(UChar cc) | |
| 66 { | |
| 67 return MediaQueryToken(LeftParenToken); | |
| 68 } | |
| 69 | |
| 70 MediaQueryToken MediaQueryTokenizer::rightParen(UChar cc) | |
| 71 { | |
| 72 return MediaQueryToken(RightParenToken); | |
| 73 } | |
| 74 | |
| 75 MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc) | |
| 76 { | |
| 77 if (nextCharsAreNumber()) { | |
| 78 reconsume(cc); | |
| 79 return consumeNumericToken(); | |
| 80 } | |
| 81 return MediaQueryToken(DelimToken, cc); | |
|
kenneth.r.christiansen
2014/03/08 22:37:47
DelimiterToken why not write it out, it is quite s
| |
| 82 } | |
| 83 | |
| 84 MediaQueryToken MediaQueryTokenizer::comma(UChar cc) | |
| 85 { | |
| 86 return MediaQueryToken(CommaToken); | |
| 87 } | |
| 88 | |
| 89 MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc) | |
| 90 { | |
| 91 if (nextCharsAreNumber()) { | |
| 92 reconsume(cc); | |
| 93 return consumeNumericToken(); | |
| 94 } | |
| 95 if (nextCharsAreIdentifier()) { | |
| 96 reconsume(cc); | |
| 97 return consumeIdentLikeToken(); | |
| 98 } | |
| 99 return MediaQueryToken(DelimToken, cc); | |
| 100 } | |
| 101 | |
| 102 MediaQueryToken MediaQueryTokenizer::solidus(UChar cc) | |
| 103 { | |
| 104 return MediaQueryToken(DelimToken, cc); | |
| 105 } | |
| 106 | |
| 107 MediaQueryToken MediaQueryTokenizer::colon(UChar cc) | |
| 108 { | |
| 109 return MediaQueryToken(ColonToken); | |
| 110 } | |
| 111 | |
| 112 MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc) | |
| 113 { | |
| 114 return MediaQueryToken(SemicolonToken); | |
| 115 } | |
| 116 | |
| 117 MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc) | |
| 118 { | |
| 119 if (twoCharsAreValidEscape(cc, m_input->currentInputChar())) { | |
| 120 reconsume(cc); | |
| 121 return consumeIdentLikeToken(); | |
| 122 } | |
| 123 return MediaQueryToken(DelimToken, cc); | |
| 124 } | |
| 125 | |
| 126 MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc) | |
| 127 { | |
| 128 reconsume(cc); | |
| 129 return consumeNumericToken(); | |
| 130 } | |
| 131 | |
| 132 MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc) | |
| 133 { | |
| 134 reconsume(cc); | |
| 135 return consumeIdentLikeToken(); | |
| 136 } | |
| 137 | |
| 138 MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc) | |
| 139 { | |
| 140 return MediaQueryToken(EOFToken); | |
| 141 } | |
| 142 | |
| 143 void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTo kens) | |
| 144 { | |
| 145 MediaQueryTokenizer tokenizer; | |
| 146 // According to the spec, we should perform preprocessing here. | |
| 147 // See: http://www.w3.org/TR/css-syntax-3/#input-preprocessing | |
| 148 // | |
| 149 // However, we can skip this step since: | |
| 150 // * We're using HTML spaces (which accept \r and \f as a valid white space) | |
| 151 // * Do not count white spaces | |
| 152 // * consumeEscape replaces NULLs for replacement characters | |
| 153 | |
| 154 MediaQueryInputStream input(string); | |
| 155 while (true) { | |
| 156 outTokens.append(tokenizer.nextToken(input)); | |
| 157 if (outTokens.last().type() == EOFToken) | |
| 158 return; | |
| 159 } | |
| 160 } | |
| 161 | |
| 162 MediaQueryToken MediaQueryTokenizer::nextToken(MediaQueryInputStream& input) | |
| 163 { | |
| 164 // Unlike the HTMLTokenizer, the CSS Syntax spec is written | |
| 165 // as a stateless, (fixed-size) look-ahead tokenizer. | |
| 166 // We could move to the stateful model and instead create | |
| 167 // states for all the "next 3 codepoints are X" cases. | |
| 168 // State-machine tokenizers are easier to write to handle | |
| 169 // incremental tokenization of partial sources. | |
| 170 // However, for now we follow the spec exactly. | |
| 171 m_input = &input; | |
| 172 UChar cc = consume(); | |
| 173 CodePoint codePointFunc = 0; | |
| 174 | |
| 175 if (isASCII(cc)) { | |
| 176 ASSERT_WITH_SECURITY_IMPLICATION(cc < CODE_POINTS_NUM); | |
| 177 codePointFunc = getCodePoints()->codePoints[cc]; | |
| 178 } else { | |
| 179 codePointFunc = &MediaQueryTokenizer::nameStart; | |
| 180 } | |
| 181 | |
| 182 if (codePointFunc) | |
| 183 return ((this)->*(codePointFunc))(cc); | |
| 184 | |
| 185 return MediaQueryToken(DelimToken, cc); | |
| 186 } | |
| 187 | |
| 188 // This method merges the following spec sections for efficiency | |
| 189 // http://www.w3.org/TR/css3-syntax/#consume-a-number | |
| 190 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number | |
| 191 MediaQueryToken MediaQueryTokenizer::consumeNumber() | |
| 192 { | |
| 193 ASSERT(nextCharsAreNumber()); | |
| 194 NumericValueType type = IntegerValueType; | |
| 195 double value = 0; | |
| 196 int sign = 1; | |
| 197 unsigned peekOffset = 0; | |
| 198 int exponentSign = 1; | |
| 199 unsigned exponentStartPos = 0; | |
| 200 unsigned exponentEndPos = 0; | |
| 201 unsigned fractionStartPos = 0; | |
| 202 unsigned fractionEndPos = 0; | |
| 203 unsigned long long integerPart; | |
| 204 double fractionPart; | |
| 205 unsigned fractionDigits; | |
| 206 unsigned long long exponentPart; | |
| 207 if (m_input->currentInputChar() == '+') { | |
| 208 ++peekOffset; | |
| 209 } else if (m_input->peek(peekOffset) == '-') { | |
| 210 sign = -1; | |
| 211 ++peekOffset; | |
| 212 } | |
| 213 unsigned intStartPos = peekOffset; | |
| 214 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); | |
| 215 unsigned intEndPos = peekOffset; | |
| 216 if (m_input->peek(peekOffset) == '.' && isASCIIDigit(m_input->peek(++peekOff set))) { | |
| 217 fractionStartPos = peekOffset - 1; | |
| 218 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); | |
| 219 fractionEndPos = peekOffset; | |
| 220 } | |
| 221 if ((m_input->peek(peekOffset) == 'E' || m_input->peek(peekOffset) == 'e')) { | |
| 222 int peekOffsetBeforeExponent = peekOffset; | |
| 223 ++peekOffset; | |
| 224 if (m_input->peek(peekOffset) == '+') { | |
| 225 ++peekOffset; | |
| 226 } else if (m_input->peek(peekOffset) =='-') { | |
| 227 exponentSign = -1; | |
| 228 ++peekOffset; | |
| 229 } | |
| 230 exponentStartPos = peekOffset; | |
| 231 peekOffset = m_input->skipWhilePredicate<isASCIIDigit>(peekOffset); | |
| 232 exponentEndPos = peekOffset; | |
| 233 if (exponentEndPos == exponentStartPos) | |
| 234 peekOffset = peekOffsetBeforeExponent; | |
| 235 } | |
| 236 integerPart = m_input->getUInt(intStartPos, intEndPos); | |
| 237 fractionDigits = fractionEndPos - fractionStartPos; | |
| 238 unsigned floatingFractionEndPos = fractionEndPos; | |
| 239 fractionPart = m_input->getDouble(fractionStartPos, floatingFractionEndPos); | |
| 240 exponentPart = m_input->getUInt(exponentStartPos, exponentEndPos); | |
| 241 double exponent = pow(10, (float)exponentSign * (double)exponentPart); | |
| 242 value = (double)sign * ((double)integerPart + fractionPart) * exponent; | |
| 243 | |
| 244 m_input->advance(peekOffset); | |
| 245 if (fractionDigits > 0) | |
| 246 type = NumberValueType; | |
| 247 | |
| 248 return MediaQueryToken(NumberToken, value, type); | |
| 249 } | |
| 250 | |
| 251 // http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token | |
| 252 MediaQueryToken MediaQueryTokenizer::consumeNumericToken() | |
| 253 { | |
| 254 MediaQueryToken token = consumeNumber(); | |
| 255 if (nextCharsAreIdentifier()) | |
| 256 token.convertToDimensionWithUnit(consumeName()); | |
| 257 else if (consumeIfNext('%')) | |
| 258 token.convertToPercentage(); | |
| 259 return token; | |
| 260 } | |
| 261 | |
| 262 // http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token | |
| 263 MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken() | |
| 264 { | |
| 265 String name = consumeName(); | |
| 266 if (consumeIfNext('(')) | |
| 267 return MediaQueryToken(FunctionToken, name); | |
| 268 return MediaQueryToken(IdentToken, name); | |
| 269 } | |
| 270 | |
| 271 void MediaQueryTokenizer::consumeUntilNotWhitespace() | |
| 272 { | |
| 273 // Using HTML space here rather than CSS space since we don't do preprocessi ng | |
| 274 while (isHTMLSpace<UChar>(m_input->currentInputChar())) | |
| 275 consume(); | |
| 276 } | |
| 277 | |
| 278 bool MediaQueryTokenizer::consumeIfNext(UChar character) | |
| 279 { | |
| 280 if (m_input->currentInputChar() == character) { | |
| 281 consume(); | |
| 282 return true; | |
| 283 } | |
| 284 return false; | |
| 285 } | |
| 286 | |
| 287 // http://www.w3.org/TR/css3-syntax/#consume-a-name | |
| 288 String MediaQueryTokenizer::consumeName() | |
| 289 { | |
| 290 // FIXME: Is this as efficient as it can be? | |
| 291 // The possibility of escape chars mandates a copy AFAICT. | |
| 292 Vector<UChar> result; | |
| 293 while (true) { | |
| 294 if (isNameChar(m_input->currentInputChar())) { | |
| 295 result.append(consume()); | |
| 296 continue; | |
| 297 } | |
| 298 if (nextTwoCharsAreValidEscape()) { | |
| 299 // "consume()" fixes a spec bug. | |
| 300 // The first code point should be consumed before consuming the esca ped code point. | |
| 301 consume(); | |
| 302 result.append(consumeEscape()); | |
| 303 continue; | |
| 304 } | |
| 305 return String(result); | |
| 306 } | |
| 307 } | |
| 308 | |
| 309 // http://www.w3.org/TR/css-syntax-3/#consume-an-escaped-code-point | |
| 310 UChar MediaQueryTokenizer::consumeEscape() | |
| 311 { | |
| 312 UChar cc = consume(); | |
| 313 ASSERT(cc != '\n'); | |
| 314 if (isASCIIHexDigit(cc)) { | |
| 315 unsigned consumedHexDigits = 1; | |
| 316 String hexChars; | |
| 317 do { | |
| 318 hexChars.append(cc); | |
| 319 cc = consume(); | |
| 320 consumedHexDigits++; | |
| 321 } while (consumedHexDigits < 6 && isASCIIHexDigit(cc)); | |
| 322 bool ok = false; | |
| 323 UChar codePoint = hexChars.toUIntStrict(&ok, 16); | |
| 324 if (!ok) | |
| 325 return WTF::Unicode::replacementCharacter; | |
| 326 return codePoint; | |
| 327 } | |
| 328 | |
| 329 // Replaces NULLs with replacement characters, since we do not perform prepr ocessing | |
| 330 if (cc == kEndOfFileMarker) | |
| 331 return WTF::Unicode::replacementCharacter; | |
| 332 return cc; | |
| 333 } | |
| 334 | |
| 335 bool MediaQueryTokenizer::nextTwoCharsAreValidEscape() | |
| 336 { | |
| 337 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2)); | |
| 338 } | |
| 339 | |
| 340 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number | |
| 341 bool MediaQueryTokenizer::nextCharsAreNumber() | |
| 342 { | |
| 343 UChar first = m_input->currentInputChar(); | |
| 344 UChar second = m_input->peek(1); | |
| 345 if (isASCIIDigit(first)) | |
| 346 return true; | |
| 347 if (first == '+' || first == '-') | |
| 348 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input ->peek(2)))); | |
| 349 if (first =='.') | |
| 350 return (isASCIIDigit(second)); | |
| 351 return false; | |
| 352 } | |
| 353 | |
| 354 // http://www.w3.org/TR/css3-syntax/#would-start-an-identifier | |
| 355 bool MediaQueryTokenizer::nextCharsAreIdentifier() | |
| 356 { | |
| 357 UChar firstChar = m_input->currentInputChar(); | |
| 358 if (isNameStart(firstChar) || nextTwoCharsAreValidEscape()) | |
| 359 return true; | |
| 360 | |
| 361 if (firstChar == '-') { | |
| 362 if (isNameStart(m_input->peek(1))) | |
| 363 return true; | |
| 364 return twoCharsAreValidEscape(m_input->peek(1), m_input->peek(2)); | |
| 365 } | |
| 366 | |
| 367 return false; | |
| 368 } | |
| 369 | |
| 370 } // namespace WebCore | |
| OLD | NEW |