| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "core/css/parser/CSSTokenizer.h" | 5 #include "core/css/parser/CSSTokenizer.h" |
| 6 | 6 |
| 7 namespace blink { | 7 namespace blink { |
| 8 #include "core/CSSTokenizerCodepoints.cpp" | 8 #include "core/CSSTokenizerCodepoints.cpp" |
| 9 } | 9 } |
| 10 | 10 |
| 11 #include "core/css/parser/CSSParserIdioms.h" | 11 #include "core/css/parser/CSSParserIdioms.h" |
| 12 #include "core/css/parser/CSSParserObserverWrapper.h" | 12 #include "core/css/parser/CSSParserObserverWrapper.h" |
| 13 #include "core/css/parser/CSSParserTokenRange.h" | 13 #include "core/css/parser/CSSParserTokenRange.h" |
| 14 #include "core/css/parser/CSSTokenizerInputStream.h" | 14 #include "core/css/parser/CSSTokenizerInputStream.h" |
| 15 #include "core/html/parser/HTMLParserIdioms.h" | 15 #include "core/html/parser/HTMLParserIdioms.h" |
| 16 #include "wtf/text/CharacterNames.h" | 16 #include "wtf/text/CharacterNames.h" |
| 17 | 17 |
| 18 namespace blink { | 18 namespace blink { |
| 19 | 19 |
| 20 CSSTokenizer::Scope::Scope(const String& string) | 20 CSSTokenizer::Scope::Scope(const String& string) |
| 21 : m_string(string) | 21 : m_string(string) |
| 22 { | 22 { |
| 23 // According to the spec, we should perform preprocessing here. | 23 // According to the spec, we should perform preprocessing here. |
| 24 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing | 24 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing |
| 25 // | 25 // |
| 26 // However, we can skip this step since: | 26 // However, we can skip this step since: |
| 27 // * We're using HTML spaces (which accept \r and \f as a valid white space) | 27 // * We're using HTML spaces (which accept \r and \f as a valid white space) |
| 28 // * Do not count white spaces | 28 // * Do not count white spaces |
| 29 // * CSSTokenizerInputStream::peek replaces NULLs for replacement characters | 29 // * CSSTokenizerInputStream::nextInputChar() replaces NULLs for replacement
characters |
| 30 | 30 |
| 31 if (string.isEmpty()) | 31 if (string.isEmpty()) |
| 32 return; | 32 return; |
| 33 | 33 |
| 34 // To avoid resizing we err on the side of reserving too much space. | 34 // To avoid resizing we err on the side of reserving too much space. |
| 35 // Most strings we tokenize have about 3.5 to 5 characters per token. | 35 // Most strings we tokenize have about 3.5 to 5 characters per token. |
| 36 m_tokens.reserveInitialCapacity(string.length() / 3); | 36 m_tokens.reserveInitialCapacity(string.length() / 3); |
| 37 | 37 |
| 38 CSSTokenizerInputStream input(string); | 38 CSSTokenizerInputStream input(string); |
| 39 CSSTokenizer tokenizer(input, *this); | 39 CSSTokenizer tokenizer(input, *this); |
| (...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 184 { | 184 { |
| 185 ASSERT(cc == '*'); | 185 ASSERT(cc == '*'); |
| 186 if (consumeIfNext('=')) | 186 if (consumeIfNext('=')) |
| 187 return CSSParserToken(SubstringMatchToken); | 187 return CSSParserToken(SubstringMatchToken); |
| 188 return CSSParserToken(DelimiterToken, '*'); | 188 return CSSParserToken(DelimiterToken, '*'); |
| 189 } | 189 } |
| 190 | 190 |
| 191 CSSParserToken CSSTokenizer::lessThan(UChar cc) | 191 CSSParserToken CSSTokenizer::lessThan(UChar cc) |
| 192 { | 192 { |
| 193 ASSERT(cc == '<'); | 193 ASSERT(cc == '<'); |
| 194 if (m_input.peek(0) == '!' && m_input.peek(1) == '-' && m_input.peek(2) == '
-') { | 194 if (m_input.peekWithoutReplacement(0) == '!' |
| 195 && m_input.peekWithoutReplacement(1) == '-' |
| 196 && m_input.peekWithoutReplacement(2) == '-') { |
| 195 m_input.advance(3); | 197 m_input.advance(3); |
| 196 return CSSParserToken(CDOToken); | 198 return CSSParserToken(CDOToken); |
| 197 } | 199 } |
| 198 return CSSParserToken(DelimiterToken, '<'); | 200 return CSSParserToken(DelimiterToken, '<'); |
| 199 } | 201 } |
| 200 | 202 |
| 201 CSSParserToken CSSTokenizer::comma(UChar cc) | 203 CSSParserToken CSSTokenizer::comma(UChar cc) |
| 202 { | 204 { |
| 203 return CSSParserToken(CommaToken); | 205 return CSSParserToken(CommaToken); |
| 204 } | 206 } |
| 205 | 207 |
| 206 CSSParserToken CSSTokenizer::hyphenMinus(UChar cc) | 208 CSSParserToken CSSTokenizer::hyphenMinus(UChar cc) |
| 207 { | 209 { |
| 208 if (nextCharsAreNumber(cc)) { | 210 if (nextCharsAreNumber(cc)) { |
| 209 reconsume(cc); | 211 reconsume(cc); |
| 210 return consumeNumericToken(); | 212 return consumeNumericToken(); |
| 211 } | 213 } |
| 212 if (m_input.peek(0) == '-' && m_input.peek(1) == '>') { | 214 if (m_input.peekWithoutReplacement(0) == '-' |
| 215 && m_input.peekWithoutReplacement(1) == '>') { |
| 213 m_input.advance(2); | 216 m_input.advance(2); |
| 214 return CSSParserToken(CDCToken); | 217 return CSSParserToken(CDCToken); |
| 215 } | 218 } |
| 216 if (nextCharsAreIdentifier(cc)) { | 219 if (nextCharsAreIdentifier(cc)) { |
| 217 reconsume(cc); | 220 reconsume(cc); |
| 218 return consumeIdentLikeToken(); | 221 return consumeIdentLikeToken(); |
| 219 } | 222 } |
| 220 return CSSParserToken(DelimiterToken, cc); | 223 return CSSParserToken(DelimiterToken, cc); |
| 221 } | 224 } |
| 222 | 225 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 237 } | 240 } |
| 238 | 241 |
| 239 CSSParserToken CSSTokenizer::semiColon(UChar cc) | 242 CSSParserToken CSSTokenizer::semiColon(UChar cc) |
| 240 { | 243 { |
| 241 return CSSParserToken(SemicolonToken); | 244 return CSSParserToken(SemicolonToken); |
| 242 } | 245 } |
| 243 | 246 |
| 244 CSSParserToken CSSTokenizer::hash(UChar cc) | 247 CSSParserToken CSSTokenizer::hash(UChar cc) |
| 245 { | 248 { |
| 246 UChar nextChar = m_input.nextInputChar(); | 249 UChar nextChar = m_input.nextInputChar(); |
| 247 if (isNameCodePoint(nextChar) || twoCharsAreValidEscape(nextChar, m_input.pe
ek(1))) { | 250 if (isNameCodePoint(nextChar) || twoCharsAreValidEscape(nextChar, m_input.pe
ekWithoutReplacement(1))) { |
| 248 HashTokenType type = nextCharsAreIdentifier() ? HashTokenId : HashTokenU
nrestricted; | 251 HashTokenType type = nextCharsAreIdentifier() ? HashTokenId : HashTokenU
nrestricted; |
| 249 return CSSParserToken(type, consumeName()); | 252 return CSSParserToken(type, consumeName()); |
| 250 } | 253 } |
| 251 | 254 |
| 252 return CSSParserToken(DelimiterToken, cc); | 255 return CSSParserToken(DelimiterToken, cc); |
| 253 } | 256 } |
| 254 | 257 |
| 255 CSSParserToken CSSTokenizer::circumflexAccent(UChar cc) | 258 CSSParserToken CSSTokenizer::circumflexAccent(UChar cc) |
| 256 { | 259 { |
| 257 ASSERT(cc == '^'); | 260 ASSERT(cc == '^'); |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 305 | 308 |
| 306 CSSParserToken CSSTokenizer::asciiDigit(UChar cc) | 309 CSSParserToken CSSTokenizer::asciiDigit(UChar cc) |
| 307 { | 310 { |
| 308 reconsume(cc); | 311 reconsume(cc); |
| 309 return consumeNumericToken(); | 312 return consumeNumericToken(); |
| 310 } | 313 } |
| 311 | 314 |
| 312 CSSParserToken CSSTokenizer::letterU(UChar cc) | 315 CSSParserToken CSSTokenizer::letterU(UChar cc) |
| 313 { | 316 { |
| 314 if (m_input.nextInputChar() == '+' | 317 if (m_input.nextInputChar() == '+' |
| 315 && (isASCIIHexDigit(m_input.peek(1)) || m_input.peek(1) == '?')) { | 318 && (isASCIIHexDigit(m_input.peekWithoutReplacement(1)) |
| 319 || m_input.peekWithoutReplacement(1) == '?')) { |
| 316 m_input.advance(); | 320 m_input.advance(); |
| 317 return consumeUnicodeRange(); | 321 return consumeUnicodeRange(); |
| 318 } | 322 } |
| 319 reconsume(cc); | 323 reconsume(cc); |
| 320 return consumeIdentLikeToken(); | 324 return consumeIdentLikeToken(); |
| 321 } | 325 } |
| 322 | 326 |
| 323 CSSParserToken CSSTokenizer::nameStart(UChar cc) | 327 CSSParserToken CSSTokenizer::nameStart(UChar cc) |
| 324 { | 328 { |
| 325 reconsume(cc); | 329 reconsume(cc); |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 376 static double getInteger(CSSTokenizerInputStream& input, unsigned& offset) | 380 static double getInteger(CSSTokenizerInputStream& input, unsigned& offset) |
| 377 { | 381 { |
| 378 unsigned intStartPos = offset; | 382 unsigned intStartPos = offset; |
| 379 offset = input.skipWhilePredicate<isASCIIDigit>(offset); | 383 offset = input.skipWhilePredicate<isASCIIDigit>(offset); |
| 380 unsigned intEndPos = offset; | 384 unsigned intEndPos = offset; |
| 381 return input.getDouble(intStartPos, intEndPos); | 385 return input.getDouble(intStartPos, intEndPos); |
| 382 } | 386 } |
| 383 | 387 |
| 384 static double getFraction(CSSTokenizerInputStream& input, unsigned& offset) | 388 static double getFraction(CSSTokenizerInputStream& input, unsigned& offset) |
| 385 { | 389 { |
| 386 if (input.peek(offset) != '.' || !isASCIIDigit(input.peek(offset + 1))) | 390 if (input.peekWithoutReplacement(offset) != '.' |
| 391 || !isASCIIDigit(input.peekWithoutReplacement(offset + 1))) |
| 387 return 0; | 392 return 0; |
| 388 unsigned startOffset = offset; | 393 unsigned startOffset = offset; |
| 389 offset = input.skipWhilePredicate<isASCIIDigit>(offset + 1); | 394 offset = input.skipWhilePredicate<isASCIIDigit>(offset + 1); |
| 390 return input.getDouble(startOffset, offset); | 395 return input.getDouble(startOffset, offset); |
| 391 } | 396 } |
| 392 | 397 |
| 393 static double getExponent(CSSTokenizerInputStream& input, unsigned& offset, int&
sign) | 398 static double getExponent(CSSTokenizerInputStream& input, unsigned& offset, int&
sign) |
| 394 { | 399 { |
| 395 unsigned exponentStartPos = 0; | 400 unsigned exponentStartPos = 0; |
| 396 unsigned exponentEndPos = 0; | 401 unsigned exponentEndPos = 0; |
| 397 if ((input.peek(offset) == 'E' || input.peek(offset) == 'e')) { | 402 UChar next = input.peekWithoutReplacement(offset); |
| 398 int offsetBeforeExponent = offset; | 403 if (next != 'E' && next != 'e') |
| 404 return 0; |
| 405 int offsetBeforeExponent = offset; |
| 406 ++offset; |
| 407 next = input.peekWithoutReplacement(offset); |
| 408 if (next == '+') { |
| 399 ++offset; | 409 ++offset; |
| 400 if (input.peek(offset) == '+') { | 410 } else if (next =='-') { |
| 401 ++offset; | 411 sign = -1; |
| 402 } else if (input.peek(offset) =='-') { | 412 ++offset; |
| 403 sign = -1; | |
| 404 ++offset; | |
| 405 } | |
| 406 exponentStartPos = offset; | |
| 407 offset = input.skipWhilePredicate<isASCIIDigit>(offset); | |
| 408 exponentEndPos = offset; | |
| 409 if (exponentEndPos == exponentStartPos) | |
| 410 offset = offsetBeforeExponent; | |
| 411 } | 413 } |
| 414 exponentStartPos = offset; |
| 415 offset = input.skipWhilePredicate<isASCIIDigit>(offset); |
| 416 exponentEndPos = offset; |
| 417 if (exponentEndPos == exponentStartPos) |
| 418 offset = offsetBeforeExponent; |
| 412 return input.getDouble(exponentStartPos, exponentEndPos); | 419 return input.getDouble(exponentStartPos, exponentEndPos); |
| 413 } | 420 } |
| 414 | 421 |
| 415 // This method merges the following spec sections for efficiency | 422 // This method merges the following spec sections for efficiency |
| 416 // http://www.w3.org/TR/css3-syntax/#consume-a-number | 423 // http://www.w3.org/TR/css3-syntax/#consume-a-number |
| 417 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number | 424 // http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number |
| 418 CSSParserToken CSSTokenizer::consumeNumber() | 425 CSSParserToken CSSTokenizer::consumeNumber() |
| 419 { | 426 { |
| 420 ASSERT(nextCharsAreNumber()); | 427 ASSERT(nextCharsAreNumber()); |
| 421 NumericValueType type = IntegerValueType; | 428 NumericValueType type = IntegerValueType; |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 521 --lengthRemaining; | 528 --lengthRemaining; |
| 522 } | 529 } |
| 523 | 530 |
| 524 UChar32 end = start; | 531 UChar32 end = start; |
| 525 if (lengthRemaining && consumeIfNext('?')) { | 532 if (lengthRemaining && consumeIfNext('?')) { |
| 526 do { | 533 do { |
| 527 start *= 16; | 534 start *= 16; |
| 528 end = end * 16 + 0xF; | 535 end = end * 16 + 0xF; |
| 529 --lengthRemaining; | 536 --lengthRemaining; |
| 530 } while (lengthRemaining && consumeIfNext('?')); | 537 } while (lengthRemaining && consumeIfNext('?')); |
| 531 } else if (m_input.nextInputChar() == '-' && isASCIIHexDigit(m_input.peek(1)
)) { | 538 } else if (m_input.nextInputChar() == '-' && isASCIIHexDigit(m_input.peekWit
houtReplacement(1))) { |
| 532 m_input.advance(); | 539 m_input.advance(); |
| 533 lengthRemaining = 6; | 540 lengthRemaining = 6; |
| 534 end = 0; | 541 end = 0; |
| 535 do { | 542 do { |
| 536 end = end * 16 + toASCIIHexValue(consume()); | 543 end = end * 16 + toASCIIHexValue(consume()); |
| 537 --lengthRemaining; | 544 --lengthRemaining; |
| 538 } while (lengthRemaining && isASCIIHexDigit(m_input.nextInputChar())); | 545 } while (lengthRemaining && isASCIIHexDigit(m_input.nextInputChar())); |
| 539 } | 546 } |
| 540 | 547 |
| 541 return CSSParserToken(UnicodeRangeToken, start, end); | 548 return CSSParserToken(UnicodeRangeToken, start, end); |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 604 return; | 611 return; |
| 605 if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) | 612 if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) |
| 606 consumeEscape(); | 613 consumeEscape(); |
| 607 } | 614 } |
| 608 } | 615 } |
| 609 | 616 |
| 610 void CSSTokenizer::consumeSingleWhitespaceIfNext() | 617 void CSSTokenizer::consumeSingleWhitespaceIfNext() |
| 611 { | 618 { |
| 612 // We check for \r\n and HTML spaces since we don't do preprocessing | 619 // We check for \r\n and HTML spaces since we don't do preprocessing |
| 613 UChar c = m_input.nextInputChar(); | 620 UChar c = m_input.nextInputChar(); |
| 614 if (c == '\r' && m_input.peek(1) == '\n') | 621 if (c == '\r' && m_input.peekWithoutReplacement(1) == '\n') |
| 615 m_input.advance(2); | 622 m_input.advance(2); |
| 616 else if (isHTMLSpace(c)) | 623 else if (isHTMLSpace(c)) |
| 617 m_input.advance(); | 624 m_input.advance(); |
| 618 } | 625 } |
| 619 | 626 |
| 620 void CSSTokenizer::consumeUntilCommentEndFound() | 627 void CSSTokenizer::consumeUntilCommentEndFound() |
| 621 { | 628 { |
| 622 UChar c = consume(); | 629 UChar c = consume(); |
| 623 while (true) { | 630 while (true) { |
| 624 if (c == kEndOfFileMarker) | 631 if (c == kEndOfFileMarker) |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 701 return codePoint; | 708 return codePoint; |
| 702 } | 709 } |
| 703 | 710 |
| 704 if (cc == kEndOfFileMarker) | 711 if (cc == kEndOfFileMarker) |
| 705 return replacementCharacter; | 712 return replacementCharacter; |
| 706 return cc; | 713 return cc; |
| 707 } | 714 } |
| 708 | 715 |
| 709 bool CSSTokenizer::nextTwoCharsAreValidEscape() | 716 bool CSSTokenizer::nextTwoCharsAreValidEscape() |
| 710 { | 717 { |
| 711 return twoCharsAreValidEscape(m_input.nextInputChar(), m_input.peek(1)); | 718 return twoCharsAreValidEscape(m_input.nextInputChar(), m_input.peekWithoutRe
placement(1)); |
| 712 } | 719 } |
| 713 | 720 |
| 714 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number | 721 // http://www.w3.org/TR/css3-syntax/#starts-with-a-number |
| 715 bool CSSTokenizer::nextCharsAreNumber(UChar first) | 722 bool CSSTokenizer::nextCharsAreNumber(UChar first) |
| 716 { | 723 { |
| 717 UChar second = m_input.nextInputChar(); | 724 UChar second = m_input.nextInputChar(); |
| 718 if (isASCIIDigit(first)) | 725 if (isASCIIDigit(first)) |
| 719 return true; | 726 return true; |
| 720 if (first == '+' || first == '-') | 727 if (first == '+' || first == '-') |
| 721 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input
.peek(1)))); | 728 return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input
.peekWithoutReplacement(1)))); |
| 722 if (first =='.') | 729 if (first =='.') |
| 723 return (isASCIIDigit(second)); | 730 return (isASCIIDigit(second)); |
| 724 return false; | 731 return false; |
| 725 } | 732 } |
| 726 | 733 |
| 727 bool CSSTokenizer::nextCharsAreNumber() | 734 bool CSSTokenizer::nextCharsAreNumber() |
| 728 { | 735 { |
| 729 UChar first = consume(); | 736 UChar first = consume(); |
| 730 bool areNumber = nextCharsAreNumber(first); | 737 bool areNumber = nextCharsAreNumber(first); |
| 731 reconsume(first); | 738 reconsume(first); |
| (...skipping 21 matching lines...) Expand all Loading... |
| 753 return areIdentifier; | 760 return areIdentifier; |
| 754 } | 761 } |
| 755 | 762 |
| 756 StringView CSSTokenizer::registerString(const String& string) | 763 StringView CSSTokenizer::registerString(const String& string) |
| 757 { | 764 { |
| 758 m_scope.storeString(string); | 765 m_scope.storeString(string); |
| 759 return string; | 766 return string; |
| 760 } | 767 } |
| 761 | 768 |
| 762 } // namespace blink | 769 } // namespace blink |
| OLD | NEW |