Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "core/css/parser/CSSTokenizer.h" | 5 #include "core/css/parser/CSSTokenizer.h" |
| 6 | 6 |
| 7 namespace blink { | 7 namespace blink { |
| 8 #include "core/CSSTokenizerCodepoints.cpp" | 8 #include "core/CSSTokenizerCodepoints.cpp" |
| 9 } | 9 } |
| 10 | 10 |
| 11 #include "core/css/parser/CSSParserIdioms.h" | 11 #include "core/css/parser/CSSParserIdioms.h" |
| 12 #include "core/css/parser/CSSParserObserverWrapper.h" | 12 #include "core/css/parser/CSSParserObserverWrapper.h" |
| 13 #include "core/css/parser/CSSParserTokenRange.h" | 13 #include "core/css/parser/CSSParserTokenRange.h" |
| 14 #include "core/css/parser/CSSTokenizerInputStream.h" | 14 #include "core/css/parser/CSSTokenizerInputStream.h" |
| 15 #include "core/html/parser/HTMLParserIdioms.h" | 15 #include "core/html/parser/HTMLParserIdioms.h" |
| 16 #include "wtf/text/CharacterNames.h" | 16 #include "wtf/text/CharacterNames.h" |
| 17 | 17 |
| 18 namespace blink { | 18 namespace blink { |
| 19 | 19 |
| 20 CSSTokenizer::Scope::Scope(const String& string) | 20 CSSTokenizer::Scope::Scope(const String& string) |
| 21 : m_string(string) | 21 : m_string(string) |
| 22 { | 22 { |
| 23 // According to the spec, we should perform preprocessing here. | 23 // According to the spec, we should perform preprocessing here. |
| 24 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing | 24 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing |
| 25 // | 25 // |
| 26 // However, we can skip this step since: | 26 // However, we can skip this step since: |
| 27 // * We're using HTML spaces (which accept \r and \f as a valid white space) | 27 // * We're using HTML spaces (which accept \r and \f as a valid white space) |
| 28 // * Do not count white spaces | 28 // * Do not count white spaces |
| 29 // * CSSTokenizerInputStream::peek replaces NULLs for replacement characters | 29 // * CSSTokenizerInputStream::peek replaces NULLs for replacement characters |
| 30 | 30 |
| 31 if (string.isEmpty()) | 31 if (string.isEmpty()) |
| 32 return; | 32 return; |
| 33 | 33 |
| 34 // To avoid resizing we err on the side of reserving too much space. | 34 // To avoid resizing we err on the side of reserving too much space. |
| 35 // Most strings we tokenize have about 3.5 to 5 characters per token. | 35 // Most strings we tokenize have about 3.5 to 5 characters per token. |
| 36 m_tokens.reserveInitialCapacity(string.length() / 3); | 36 m_tokens.reserveInitialCapacity(string.length() / 3); |
| 37 | 37 |
| 38 CSSTokenizerInputStream input(string); | 38 CSSTokenizerInputStream input(string); |
| 39 CSSTokenizer tokenizer(input, *this); | 39 CSSTokenizer tokenizer(input, *this); |
| 40 while (true) { | 40 while (true) { |
| 41 CSSParserToken token = tokenizer.nextToken(); | 41 CSSParserToken token = tokenizer.nextToken(); |
| 42 if (token.type() == CommentToken) | 42 if (token.type() == CommentToken) |
| 43 continue; | 43 continue; |
| 44 if (token.type() == EOFToken) | 44 if (token.type() == EOFToken) |
| 45 return; | 45 return; |
| 46 m_tokens.append(token); | 46 m_tokens.append(token); |
| 47 } | 47 } |
| 48 } | 48 } |
| 49 | 49 |
| 50 CSSTokenizer::Scope::Scope(const String& string, CSSParserObserverWrapper& wrapp er) | 50 CSSTokenizer::Scope::Scope(const String& string, CSSParserObserverWrapper& wrapp er) |
| 51 : m_string(string) | 51 : m_string(string) |
| 52 { | 52 { |
| 53 if (string.isEmpty()) | 53 if (string.isEmpty()) |
| 54 return; | 54 return; |
| 55 | 55 |
| 56 CSSTokenizerInputStream input(string); | 56 CSSTokenizerInputStream input(string); |
| 57 CSSTokenizer tokenizer(input, *this); | 57 CSSTokenizer tokenizer(input, *this); |
| 58 | 58 |
| 59 unsigned offset = 0; | 59 unsigned offset = 0; |
| 60 while (true) { | 60 while (true) { |
| 61 CSSParserToken token = tokenizer.nextToken(); | 61 CSSParserToken token = tokenizer.nextToken(); |
| 62 if (token.type() == EOFToken) | 62 if (token.type() == EOFToken) |
| 63 break; | 63 break; |
| 64 if (token.type() == CommentToken) { | 64 if (token.type() == CommentToken) { |
| 65 wrapper.addComment(offset, input.offset(), m_tokens.size()); | 65 wrapper.addComment(offset, input.offset(), m_tokens.size()); |
| 66 } else { | 66 } else { |
| 67 m_tokens.append(token); | 67 m_tokens.append(token); |
| 68 wrapper.addToken(offset); | 68 wrapper.addToken(offset); |
| 69 } | 69 } |
| 70 offset = input.offset(); | 70 offset = input.offset(); |
| 71 } | 71 } |
| 72 | 72 |
| 73 wrapper.addToken(offset); | 73 wrapper.addToken(offset); |
| 74 wrapper.finalizeConstruction(m_tokens.begin()); | 74 wrapper.finalizeConstruction(m_tokens.begin()); |
| 75 } | 75 } |
| 76 | 76 |
| 77 CSSParserTokenRange CSSTokenizer::Scope::tokenRange() | 77 CSSParserTokenRange CSSTokenizer::Scope::tokenRange() |
| 78 { | 78 { |
| 79 return m_tokens; | 79 return CSSParserTokenRange(m_tokens.begin(), m_tokens.end()); |
| 80 } | 80 } |
| 81 | 81 |
| 82 unsigned CSSTokenizer::Scope::tokenCount() | 82 unsigned CSSTokenizer::Scope::tokenCount() |
| 83 { | 83 { |
| 84 return m_tokens.size(); | 84 return m_tokens.size(); |
| 85 } | 85 } |
| 86 | 86 |
| 87 static bool isNewLine(UChar cc) | 87 static bool isNewLine(UChar cc) |
| 88 { | 88 { |
| 89 // We check \r and \f here, since we have no preprocessing stage | 89 // We check \r and \f here, since we have no preprocessing stage |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 118 { | 118 { |
| 119 m_input.advance(offset); | 119 m_input.advance(offset); |
| 120 } | 120 } |
| 121 | 121 |
| 122 CSSParserToken CSSTokenizer::whiteSpace(UChar cc) | 122 CSSParserToken CSSTokenizer::whiteSpace(UChar cc) |
| 123 { | 123 { |
| 124 consumeUntilNonWhitespace(); | 124 consumeUntilNonWhitespace(); |
| 125 return CSSParserToken(WhitespaceToken); | 125 return CSSParserToken(WhitespaceToken); |
| 126 } | 126 } |
| 127 | 127 |
| 128 static bool popIfBlockMatches(Vector<CSSParserTokenType>& blockStack, CSSParserT okenType type) | 128 static bool popIfBlockMatches(Vector<CSSParserTokenType, 8>& blockStack, CSSPars erTokenType type) |
| 129 { | 129 { |
| 130 if (!blockStack.isEmpty() && blockStack.last() == type) { | 130 if (!blockStack.isEmpty() && blockStack.last() == type) { |
| 131 blockStack.removeLast(); | 131 blockStack.removeLast(); |
| 132 return true; | 132 return true; |
| 133 } | 133 } |
| 134 return false; | 134 return false; |
| 135 } | 135 } |
| 136 | 136 |
| 137 CSSParserToken CSSTokenizer::blockStart(CSSParserTokenType type) | 137 CSSParserToken CSSTokenizer::blockStart(CSSParserTokenType type) |
| 138 { | 138 { |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 351 CSSParserToken CSSTokenizer::nextToken() | 351 CSSParserToken CSSTokenizer::nextToken() |
| 352 { | 352 { |
| 353 // Unlike the HTMLTokenizer, the CSS Syntax spec is written | 353 // Unlike the HTMLTokenizer, the CSS Syntax spec is written |
| 354 // as a stateless, (fixed-size) look-ahead tokenizer. | 354 // as a stateless, (fixed-size) look-ahead tokenizer. |
| 355 // We could move to the stateful model and instead create | 355 // We could move to the stateful model and instead create |
| 356 // states for all the "next 3 codepoints are X" cases. | 356 // states for all the "next 3 codepoints are X" cases. |
| 357 // State-machine tokenizers are easier to write to handle | 357 // State-machine tokenizers are easier to write to handle |
| 358 // incremental tokenization of partial sources. | 358 // incremental tokenization of partial sources. |
| 359 // However, for now we follow the spec exactly. | 359 // However, for now we follow the spec exactly. |
| 360 UChar cc = consume(); | 360 UChar cc = consume(); |
| 361 CodePoint codePointFunc = 0; | |
| 362 | 361 |
| 363 if (isASCII(cc)) { | 362 switch (cc) { |
| 364 ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber); | 363 case 0: |
| 365 codePointFunc = codePoints[cc]; | 364 return endOfFile(cc); |
| 366 } else { | 365 case 1: |
| 367 codePointFunc = &CSSTokenizer::nameStart; | 366 case 2: |
| 367 case 3: | |
| 368 case 4: | |
| 369 case 5: | |
| 370 case 6: | |
| 371 case 7: | |
| 372 case 8: | |
| 373 return CSSParserToken(DelimiterToken, cc); | |
|
esprehn
2016/04/22 23:30:58
Skip the virtual call overhead of the lookup table
Timothy Loh
2016/04/27 06:52:05
This is definitely an improvement (I think using c
| |
| 374 case 9: | |
| 375 case 10: | |
| 376 return whiteSpace(cc); | |
| 377 case 11: | |
| 378 return CSSParserToken(DelimiterToken, cc); | |
| 379 case 12: | |
| 380 case 13: | |
| 381 return whiteSpace(cc); | |
| 382 case 14: | |
| 383 case 15: | |
| 384 case 16: | |
| 385 case 17: | |
| 386 case 18: | |
| 387 case 19: | |
| 388 case 20: | |
| 389 case 21: | |
| 390 case 22: | |
| 391 case 23: | |
| 392 case 24: | |
| 393 case 25: | |
| 394 case 26: | |
| 395 case 27: | |
| 396 case 28: | |
| 397 case 29: | |
| 398 case 30: | |
| 399 case 31: | |
| 400 return CSSParserToken(DelimiterToken, cc); | |
| 401 case 32: | |
| 402 return whiteSpace(cc); | |
| 403 case 33: | |
| 404 return CSSParserToken(DelimiterToken, cc); | |
| 405 case 34: | |
| 406 return stringStart(cc); | |
| 407 case 35: | |
| 408 return hash(cc); | |
| 409 case 36: | |
| 410 return dollarSign(cc); | |
| 411 case 37: | |
| 412 case 38: | |
| 413 return CSSParserToken(DelimiterToken, cc); | |
| 414 case 39: | |
| 415 return stringStart(cc); | |
| 416 case 40: | |
| 417 return leftParenthesis(cc); | |
| 418 case 41: | |
| 419 return rightParenthesis(cc); | |
| 420 case 42: | |
| 421 return asterisk(cc); | |
| 422 case 43: | |
| 423 return plusOrFullStop(cc); | |
| 424 case 44: | |
| 425 return comma(cc); | |
| 426 case 45: | |
| 427 return hyphenMinus(cc); | |
| 428 case 46: | |
| 429 return plusOrFullStop(cc); | |
| 430 case 47: | |
| 431 return solidus(cc); | |
| 432 case 48: | |
| 433 case 49: | |
| 434 case 50: | |
| 435 case 51: | |
| 436 case 52: | |
| 437 case 53: | |
| 438 case 54: | |
| 439 case 55: | |
| 440 case 56: | |
| 441 case 57: | |
| 442 return asciiDigit(cc); | |
| 443 case 58: | |
| 444 return colon(cc); | |
| 445 case 59: | |
| 446 return semiColon(cc); | |
| 447 case 60: | |
| 448 return lessThan(cc); | |
| 449 case 61: | |
| 450 case 62: | |
| 451 case 63: | |
| 452 return CSSParserToken(DelimiterToken, cc); | |
| 453 case 64: | |
| 454 return commercialAt(cc); | |
| 455 case 65: | |
| 456 case 66: | |
| 457 case 67: | |
| 458 case 68: | |
| 459 case 69: | |
| 460 case 70: | |
| 461 case 71: | |
| 462 case 72: | |
| 463 case 73: | |
| 464 case 74: | |
| 465 case 75: | |
| 466 case 76: | |
| 467 case 77: | |
| 468 case 78: | |
| 469 case 79: | |
| 470 case 80: | |
| 471 case 81: | |
| 472 case 82: | |
| 473 case 83: | |
| 474 case 84: | |
| 475 return nameStart(cc); | |
| 476 case 85: | |
| 477 return letterU(cc); | |
| 478 case 86: | |
| 479 case 87: | |
| 480 case 88: | |
| 481 case 89: | |
| 482 case 90: | |
| 483 return nameStart(cc); | |
| 484 case 91: | |
| 485 return leftBracket(cc); | |
| 486 case 92: | |
| 487 return reverseSolidus(cc); | |
| 488 case 93: | |
| 489 return rightBracket(cc); | |
| 490 case 94: | |
| 491 return circumflexAccent(cc); | |
| 492 case 95: | |
| 493 return nameStart(cc); | |
| 494 case 96: | |
| 495 return CSSParserToken(DelimiterToken, cc); | |
| 496 case 97: | |
| 497 case 98: | |
| 498 case 99: | |
| 499 case 100: | |
| 500 case 101: | |
| 501 case 102: | |
| 502 case 103: | |
| 503 case 104: | |
| 504 case 105: | |
| 505 case 106: | |
| 506 case 107: | |
| 507 case 108: | |
| 508 case 109: | |
| 509 case 110: | |
| 510 case 111: | |
| 511 case 112: | |
| 512 case 113: | |
| 513 case 114: | |
| 514 case 115: | |
| 515 case 116: | |
| 516 return nameStart(cc); | |
| 517 case 117: | |
| 518 return letterU(cc); | |
| 519 case 118: | |
| 520 case 119: | |
| 521 case 120: | |
| 522 case 121: | |
| 523 case 122: | |
| 524 return nameStart(cc); | |
| 525 case 123: | |
| 526 return leftBrace(cc); | |
| 527 case 124: | |
| 528 return verticalLine(cc); | |
| 529 case 125: | |
| 530 return rightBrace(cc); | |
| 531 case 126: | |
| 532 return tilde(cc); | |
| 533 case 127: | |
| 534 return CSSParserToken(DelimiterToken, cc); | |
| 368 } | 535 } |
| 369 | 536 |
| 370 if (codePointFunc) | 537 ASSERT(!isASCII(cc)); |
| 371 return ((this)->*(codePointFunc))(cc); | 538 return nameStart(cc); |
| 372 return CSSParserToken(DelimiterToken, cc); | |
| 373 } | 539 } |
| 374 | 540 |
| 375 static NumericSign getSign(CSSTokenizerInputStream& input, unsigned& offset) | 541 static NumericSign getSign(CSSTokenizerInputStream& input, unsigned& offset) |
| 376 { | 542 { |
| 377 if (input.nextInputChar() == '+') { | 543 if (input.nextInputChar() == '+') { |
| 378 ++offset; | 544 ++offset; |
| 379 return PlusSign; | 545 return PlusSign; |
| 380 } | 546 } |
| 381 if (input.nextInputChar() == '-') { | 547 if (input.nextInputChar() == '-') { |
| 382 ++offset; | 548 ++offset; |
| (...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 612 { | 778 { |
| 613 while (true) { | 779 while (true) { |
| 614 UChar cc = consume(); | 780 UChar cc = consume(); |
| 615 if (cc == ')' || cc == kEndOfFileMarker) | 781 if (cc == ')' || cc == kEndOfFileMarker) |
| 616 return; | 782 return; |
| 617 if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) | 783 if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) |
| 618 consumeEscape(); | 784 consumeEscape(); |
| 619 } | 785 } |
| 620 } | 786 } |
| 621 | 787 |
| 622 void CSSTokenizer::consumeUntilNonWhitespace() | 788 ALWAYS_INLINE void CSSTokenizer::consumeUntilNonWhitespace() |
| 623 { | 789 { |
| 624 // Using HTML space here rather than CSS space since we don't do preprocessi ng | 790 // Using HTML space here rather than CSS space since we don't do preprocessi ng |
| 625 while (isHTMLSpace<UChar>(m_input.nextInputChar())) | 791 while (isHTMLSpace<UChar>(m_input.nextInputChar())) |
| 626 consume(); | 792 consume(); |
| 627 } | 793 } |
| 628 | 794 |
| 629 void CSSTokenizer::consumeSingleWhitespaceIfNext() | 795 void CSSTokenizer::consumeSingleWhitespaceIfNext() |
| 630 { | 796 { |
| 631 // We check for \r\n and HTML spaces since we don't do preprocessing | 797 // We check for \r\n and HTML spaces since we don't do preprocessing |
| 632 UChar c = m_input.nextInputChar(); | 798 UChar c = m_input.nextInputChar(); |
| (...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 742 { | 908 { |
| 743 UChar first = consume(); | 909 UChar first = consume(); |
| 744 bool areNumber = nextCharsAreNumber(first); | 910 bool areNumber = nextCharsAreNumber(first); |
| 745 reconsume(first); | 911 reconsume(first); |
| 746 return areNumber; | 912 return areNumber; |
| 747 } | 913 } |
| 748 | 914 |
| 749 // http://dev.w3.org/csswg/css-syntax/#would-start-an-identifier | 915 // http://dev.w3.org/csswg/css-syntax/#would-start-an-identifier |
| 750 bool CSSTokenizer::nextCharsAreIdentifier(UChar first) | 916 bool CSSTokenizer::nextCharsAreIdentifier(UChar first) |
| 751 { | 917 { |
| 918 if (isNameStartCodePoint(first)) | |
| 919 return true; | |
| 920 | |
| 752 UChar second = m_input.nextInputChar(); | 921 UChar second = m_input.nextInputChar(); |
| 753 if (isNameStartCodePoint(first) || twoCharsAreValidEscape(first, second)) | 922 if (twoCharsAreValidEscape(first, second)) |
| 754 return true; | 923 return true; |
| 755 | 924 |
| 756 if (first == '-') | 925 if (first == '-') |
| 757 return isNameStartCodePoint(second) || second == '-' || nextTwoCharsAreV alidEscape(); | 926 return isNameStartCodePoint(second) || second == '-' || nextTwoCharsAreV alidEscape(); |
| 758 | 927 |
| 759 return false; | 928 return false; |
| 760 } | 929 } |
| 761 | 930 |
| 762 bool CSSTokenizer::nextCharsAreIdentifier() | 931 bool CSSTokenizer::nextCharsAreIdentifier() |
| 763 { | 932 { |
| 764 UChar first = consume(); | 933 UChar first = consume(); |
| 765 bool areIdentifier = nextCharsAreIdentifier(first); | 934 bool areIdentifier = nextCharsAreIdentifier(first); |
| 766 reconsume(first); | 935 reconsume(first); |
| 767 return areIdentifier; | 936 return areIdentifier; |
| 768 } | 937 } |
| 769 | 938 |
| 770 CSSParserString CSSTokenizer::registerString(const String& string) | 939 CSSParserString CSSTokenizer::registerString(const String& string) |
| 771 { | 940 { |
| 772 m_scope.storeString(string); | 941 m_scope.storeString(string); |
| 773 CSSParserString result; | 942 CSSParserString result; |
| 774 result.init(string); | 943 result.init(string); |
| 775 return result; | 944 return result; |
| 776 } | 945 } |
| 777 | 946 |
| 778 } // namespace blink | 947 } // namespace blink |
| OLD | NEW |