third_party/WebKit/Source/core/css/parser/CSSTokenizer.cpp - Issue 1920583002: NOT FOR LANDING: Hack up CSSParser for speed.

Side by Side Diff: third_party/WebKit/Source/core/css/parser/CSSTokenizer.cpp

Issue 1920583002: NOT FOR LANDING: Hack up CSSParser for speed. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: missing consts. Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« third_party/WebKit/Source/core/css/parser/CSSTokenizer.h ('K') | « third_party/WebKit/Source/core/css/parser/CSSTokenizer.h ('k') | third_party/WebKit/Source/core/css/parser/CSSTokenizerInputStream.h » ('j') | third_party/WebKit/Source/core/css/parser/CSSTokenizerInputStream.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2014 The Chromium Authors. All rights reserved.	1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "core/css/parser/CSSTokenizer.h"	5 #include "core/css/parser/CSSTokenizer.h"

6	6

7 namespace blink {	7 namespace blink {

8 #include "core/CSSTokenizerCodepoints.cpp"	8 #include "core/CSSTokenizerCodepoints.cpp"

9 }	9 }

10	10

11 #include "core/css/parser/CSSParserIdioms.h"	11 #include "core/css/parser/CSSParserIdioms.h"

12 #include "core/css/parser/CSSParserObserverWrapper.h"	12 #include "core/css/parser/CSSParserObserverWrapper.h"

13 #include "core/css/parser/CSSParserTokenRange.h"	13 #include "core/css/parser/CSSParserTokenRange.h"

14 #include "core/css/parser/CSSTokenizerInputStream.h"	14 #include "core/css/parser/CSSTokenizerInputStream.h"

15 #include "core/html/parser/HTMLParserIdioms.h"	15 #include "core/html/parser/HTMLParserIdioms.h"

16 #include "wtf/text/CharacterNames.h"	16 #include "wtf/text/CharacterNames.h"

17	17

18 namespace blink {	18 namespace blink {

19	19

20 CSSTokenizer::Scope::Scope(const String& string)	20 CSSTokenizer::Scope::Scope(const String& string)

21 : m_string(string)	21 : m_string(string)

22 {	22 {

23 // According to the spec, we should perform preprocessing here.	23 // According to the spec, we should perform preprocessing here.

24 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing	24 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing

25 //	25 //

26 // However, we can skip this step since:	26 // However, we can skip this step since:

27 // * We're using HTML spaces (which accept \r and \f as a valid white space)	27 // * We're using HTML spaces (which accept \r and \f as a valid white space)

28 // * Do not count white spaces	28 // * Do not count white spaces

29 // * CSSTokenizerInputStream::peek replaces NULLs for replacement characters	29 // * CSSTokenizerInputStream::peek replaces NULLs for replacement characters

30	30

31 if (string.isEmpty())	31 if (string.isEmpty())

32 return;	32 return;

33	33

34 // To avoid resizing we err on the side of reserving too much space.	34 // To avoid resizing we err on the side of reserving too much space.

35 // Most strings we tokenize have about 3.5 to 5 characters per token.	35 // Most strings we tokenize have about 3.5 to 5 characters per token.

36 m_tokens.reserveInitialCapacity(string.length() / 3);	36 m_tokens.reserveInitialCapacity(string.length() / 3);

37	37

38 CSSTokenizerInputStream input(string);	38 CSSTokenizerInputStream input(string);

39 CSSTokenizer tokenizer(input, *this);	39 CSSTokenizer tokenizer(input, *this);

40 while (true) {	40 while (true) {

41 CSSParserToken token = tokenizer.nextToken();	41 CSSParserToken token = tokenizer.nextToken();

42 if (token.type() == CommentToken)	42 if (token.type() == CommentToken)

43 continue;	43 continue;

44 if (token.type() == EOFToken)	44 if (token.type() == EOFToken)

45 return;	45 return;

46 m_tokens.append(token);	46 m_tokens.append(token);

47 }	47 }

48 }	48 }

49	49

50 CSSTokenizer::Scope::Scope(const String& string, CSSParserObserverWrapper& wrapp er)	50 CSSTokenizer::Scope::Scope(const String& string, CSSParserObserverWrapper& wrapp er)

51 : m_string(string)	51 : m_string(string)

52 {	52 {

53 if (string.isEmpty())	53 if (string.isEmpty())

54 return;	54 return;

55	55

56 CSSTokenizerInputStream input(string);	56 CSSTokenizerInputStream input(string);

57 CSSTokenizer tokenizer(input, *this);	57 CSSTokenizer tokenizer(input, *this);

58	58

59 unsigned offset = 0;	59 unsigned offset = 0;

60 while (true) {	60 while (true) {

61 CSSParserToken token = tokenizer.nextToken();	61 CSSParserToken token = tokenizer.nextToken();

62 if (token.type() == EOFToken)	62 if (token.type() == EOFToken)

63 break;	63 break;

64 if (token.type() == CommentToken) {	64 if (token.type() == CommentToken) {

65 wrapper.addComment(offset, input.offset(), m_tokens.size());	65 wrapper.addComment(offset, input.offset(), m_tokens.size());

66 } else {	66 } else {

67 m_tokens.append(token);	67 m_tokens.append(token);

68 wrapper.addToken(offset);	68 wrapper.addToken(offset);

69 }	69 }

70 offset = input.offset();	70 offset = input.offset();

71 }	71 }

72	72

73 wrapper.addToken(offset);	73 wrapper.addToken(offset);

74 wrapper.finalizeConstruction(m_tokens.begin());	74 wrapper.finalizeConstruction(m_tokens.begin());

75 }	75 }

76	76

77 CSSParserTokenRange CSSTokenizer::Scope::tokenRange()	77 CSSParserTokenRange CSSTokenizer::Scope::tokenRange()

78 {	78 {

79 return m_tokens;	79 return CSSParserTokenRange(m_tokens.begin(), m_tokens.end());

80 }	80 }

81	81

82 unsigned CSSTokenizer::Scope::tokenCount()	82 unsigned CSSTokenizer::Scope::tokenCount()

83 {	83 {

84 return m_tokens.size();	84 return m_tokens.size();

85 }	85 }

86	86

87 static bool isNewLine(UChar cc)	87 static bool isNewLine(UChar cc)

88 {	88 {

89 // We check \r and \f here, since we have no preprocessing stage	89 // We check \r and \f here, since we have no preprocessing stage

(...skipping 28 matching lines...) Expand all Loading...
118 {	118 {

119 m_input.advance(offset);	119 m_input.advance(offset);

120 }	120 }

121	121

122 CSSParserToken CSSTokenizer::whiteSpace(UChar cc)	122 CSSParserToken CSSTokenizer::whiteSpace(UChar cc)

123 {	123 {

124 consumeUntilNonWhitespace();	124 consumeUntilNonWhitespace();

125 return CSSParserToken(WhitespaceToken);	125 return CSSParserToken(WhitespaceToken);

126 }	126 }

127	127

128 static bool popIfBlockMatches(Vector<CSSParserTokenType>& blockStack, CSSParserT okenType type)	128 static bool popIfBlockMatches(Vector<CSSParserTokenType, 8>& blockStack, CSSPars erTokenType type)

129 {	129 {

130 if (!blockStack.isEmpty() && blockStack.last() == type) {	130 if (!blockStack.isEmpty() && blockStack.last() == type) {

131 blockStack.removeLast();	131 blockStack.removeLast();

132 return true;	132 return true;

133 }	133 }

134 return false;	134 return false;

135 }	135 }

136	136

137 CSSParserToken CSSTokenizer::blockStart(CSSParserTokenType type)	137 CSSParserToken CSSTokenizer::blockStart(CSSParserTokenType type)

138 {	138 {

(...skipping 212 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
351 CSSParserToken CSSTokenizer::nextToken()	351 CSSParserToken CSSTokenizer::nextToken()

352 {	352 {

353 // Unlike the HTMLTokenizer, the CSS Syntax spec is written	353 // Unlike the HTMLTokenizer, the CSS Syntax spec is written

354 // as a stateless, (fixed-size) look-ahead tokenizer.	354 // as a stateless, (fixed-size) look-ahead tokenizer.

355 // We could move to the stateful model and instead create	355 // We could move to the stateful model and instead create

356 // states for all the "next 3 codepoints are X" cases.	356 // states for all the "next 3 codepoints are X" cases.

357 // State-machine tokenizers are easier to write to handle	357 // State-machine tokenizers are easier to write to handle

358 // incremental tokenization of partial sources.	358 // incremental tokenization of partial sources.

359 // However, for now we follow the spec exactly.	359 // However, for now we follow the spec exactly.

360 UChar cc = consume();	360 UChar cc = consume();

361 CodePoint codePointFunc = 0;

362	361

363 if (isASCII(cc)) {	362 switch (cc) {

364 ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber);	363 case 0:

365 codePointFunc = codePoints[cc];	364 return endOfFile(cc);

366 } else {	365 case 1:

367 codePointFunc = &CSSTokenizer::nameStart;	366 case 2:

	367 case 3:

	368 case 4:

	369 case 5:

	370 case 6:

	371 case 7:

	372 case 8:

	373 return CSSParserToken(DelimiterToken, cc);
	esprehn 2016/04/22 23:30:58 Skip the virtual call overhead of the lookup table Skip the virtual call overhead of the lookup table. This also means we don't need to pass arguments into the functions like comma() which don't need it. Timothy Loh 2016/04/27 06:52:05 This is definitely an improvement (I think using c Show quoted text On 2016/04/22 23:30:58, esprehn wrote: > Skip the virtual call overhead of the lookup table. This also means we don't > need to pass arguments into the functions like comma() which don't need it. This is definitely an improvement (I think using code generation to make the lookup table is just unneeded complexity), but I'm not convinced this will actually make any performance difference.
	374 case 9:

	375 case 10:

	376 return whiteSpace(cc);

	377 case 11:

	378 return CSSParserToken(DelimiterToken, cc);

	379 case 12:

	380 case 13:

	381 return whiteSpace(cc);

	382 case 14:

	383 case 15:

	384 case 16:

	385 case 17:

	386 case 18:

	387 case 19:

	388 case 20:

	389 case 21:

	390 case 22:

	391 case 23:

	392 case 24:

	393 case 25:

	394 case 26:

	395 case 27:

	396 case 28:

	397 case 29:

	398 case 30:

	399 case 31:

	400 return CSSParserToken(DelimiterToken, cc);

	401 case 32:

	402 return whiteSpace(cc);

	403 case 33:

	404 return CSSParserToken(DelimiterToken, cc);

	405 case 34:

	406 return stringStart(cc);

	407 case 35:

	408 return hash(cc);

	409 case 36:

	410 return dollarSign(cc);

	411 case 37:

	412 case 38:

	413 return CSSParserToken(DelimiterToken, cc);

	414 case 39:

	415 return stringStart(cc);

	416 case 40:

	417 return leftParenthesis(cc);

	418 case 41:

	419 return rightParenthesis(cc);

	420 case 42:

	421 return asterisk(cc);

	422 case 43:

	423 return plusOrFullStop(cc);

	424 case 44:

	425 return comma(cc);

	426 case 45:

	427 return hyphenMinus(cc);

	428 case 46:

	429 return plusOrFullStop(cc);

	430 case 47:

	431 return solidus(cc);

	432 case 48:

	433 case 49:

	434 case 50:

	435 case 51:

	436 case 52:

	437 case 53:

	438 case 54:

	439 case 55:

	440 case 56:

	441 case 57:

	442 return asciiDigit(cc);

	443 case 58:

	444 return colon(cc);

	445 case 59:

	446 return semiColon(cc);

	447 case 60:

	448 return lessThan(cc);

	449 case 61:

	450 case 62:

	451 case 63:

	452 return CSSParserToken(DelimiterToken, cc);

	453 case 64:

	454 return commercialAt(cc);

	455 case 65:

	456 case 66:

	457 case 67:

	458 case 68:

	459 case 69:

	460 case 70:

	461 case 71:

	462 case 72:

	463 case 73:

	464 case 74:

	465 case 75:

	466 case 76:

	467 case 77:

	468 case 78:

	469 case 79:

	470 case 80:

	471 case 81:

	472 case 82:

	473 case 83:

	474 case 84:

	475 return nameStart(cc);

	476 case 85:

	477 return letterU(cc);

	478 case 86:

	479 case 87:

	480 case 88:

	481 case 89:

	482 case 90:

	483 return nameStart(cc);

	484 case 91:

	485 return leftBracket(cc);

	486 case 92:

	487 return reverseSolidus(cc);

	488 case 93:

	489 return rightBracket(cc);

	490 case 94:

	491 return circumflexAccent(cc);

	492 case 95:

	493 return nameStart(cc);

	494 case 96:

	495 return CSSParserToken(DelimiterToken, cc);

	496 case 97:

	497 case 98:

	498 case 99:

	499 case 100:

	500 case 101:

	501 case 102:

	502 case 103:

	503 case 104:

	504 case 105:

	505 case 106:

	506 case 107:

	507 case 108:

	508 case 109:

	509 case 110:

	510 case 111:

	511 case 112:

	512 case 113:

	513 case 114:

	514 case 115:

	515 case 116:

	516 return nameStart(cc);

	517 case 117:

	518 return letterU(cc);

	519 case 118:

	520 case 119:

	521 case 120:

	522 case 121:

	523 case 122:

	524 return nameStart(cc);

	525 case 123:

	526 return leftBrace(cc);

	527 case 124:

	528 return verticalLine(cc);

	529 case 125:

	530 return rightBrace(cc);

	531 case 126:

	532 return tilde(cc);

	533 case 127:

	534 return CSSParserToken(DelimiterToken, cc);

368 }	535 }

369	536

370 if (codePointFunc)	537 ASSERT(!isASCII(cc));

371 return ((this)->*(codePointFunc))(cc);	538 return nameStart(cc);

372 return CSSParserToken(DelimiterToken, cc);

373 }	539 }

374	540

375 static NumericSign getSign(CSSTokenizerInputStream& input, unsigned& offset)	541 static NumericSign getSign(CSSTokenizerInputStream& input, unsigned& offset)

376 {	542 {

377 if (input.nextInputChar() == '+') {	543 if (input.nextInputChar() == '+') {

378 ++offset;	544 ++offset;

379 return PlusSign;	545 return PlusSign;

380 }	546 }

381 if (input.nextInputChar() == '-') {	547 if (input.nextInputChar() == '-') {

382 ++offset;	548 ++offset;

(...skipping 229 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
612 {	778 {

613 while (true) {	779 while (true) {

614 UChar cc = consume();	780 UChar cc = consume();

615 if (cc == ')' \|\| cc == kEndOfFileMarker)	781 if (cc == ')' \|\| cc == kEndOfFileMarker)

616 return;	782 return;

617 if (twoCharsAreValidEscape(cc, m_input.nextInputChar()))	783 if (twoCharsAreValidEscape(cc, m_input.nextInputChar()))

618 consumeEscape();	784 consumeEscape();

619 }	785 }

620 }	786 }

621	787

622 void CSSTokenizer::consumeUntilNonWhitespace()	788 ALWAYS_INLINE void CSSTokenizer::consumeUntilNonWhitespace()

623 {	789 {

624 // Using HTML space here rather than CSS space since we don't do preprocessi ng	790 // Using HTML space here rather than CSS space since we don't do preprocessi ng

625 while (isHTMLSpace<UChar>(m_input.nextInputChar()))	791 while (isHTMLSpace<UChar>(m_input.nextInputChar()))

626 consume();	792 consume();

627 }	793 }

628	794

629 void CSSTokenizer::consumeSingleWhitespaceIfNext()	795 void CSSTokenizer::consumeSingleWhitespaceIfNext()

630 {	796 {

631 // We check for \r\n and HTML spaces since we don't do preprocessing	797 // We check for \r\n and HTML spaces since we don't do preprocessing

632 UChar c = m_input.nextInputChar();	798 UChar c = m_input.nextInputChar();

(...skipping 109 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
742 {	908 {

743 UChar first = consume();	909 UChar first = consume();

744 bool areNumber = nextCharsAreNumber(first);	910 bool areNumber = nextCharsAreNumber(first);

745 reconsume(first);	911 reconsume(first);

746 return areNumber;	912 return areNumber;

747 }	913 }

748	914

749 // http://dev.w3.org/csswg/css-syntax/#would-start-an-identifier	915 // http://dev.w3.org/csswg/css-syntax/#would-start-an-identifier

750 bool CSSTokenizer::nextCharsAreIdentifier(UChar first)	916 bool CSSTokenizer::nextCharsAreIdentifier(UChar first)

751 {	917 {

	918 if (isNameStartCodePoint(first))

	919 return true;

	920

752 UChar second = m_input.nextInputChar();	921 UChar second = m_input.nextInputChar();

753 if (isNameStartCodePoint(first) \|\| twoCharsAreValidEscape(first, second))	922 if (twoCharsAreValidEscape(first, second))

754 return true;	923 return true;

755	924

756 if (first == '-')	925 if (first == '-')

757 return isNameStartCodePoint(second) \|\| second == '-' \|\| nextTwoCharsAreV alidEscape();	926 return isNameStartCodePoint(second) \|\| second == '-' \|\| nextTwoCharsAreV alidEscape();

758	927

759 return false;	928 return false;

760 }	929 }

761	930

762 bool CSSTokenizer::nextCharsAreIdentifier()	931 bool CSSTokenizer::nextCharsAreIdentifier()

763 {	932 {

764 UChar first = consume();	933 UChar first = consume();

765 bool areIdentifier = nextCharsAreIdentifier(first);	934 bool areIdentifier = nextCharsAreIdentifier(first);

766 reconsume(first);	935 reconsume(first);

767 return areIdentifier;	936 return areIdentifier;

768 }	937 }

769	938

770 CSSParserString CSSTokenizer::registerString(const String& string)	939 CSSParserString CSSTokenizer::registerString(const String& string)

771 {	940 {

772 m_scope.storeString(string);	941 m_scope.storeString(string);

773 CSSParserString result;	942 CSSParserString result;

774 result.init(string);	943 result.init(string);

775 return result;	944 return result;

776 }	945 }

777	946

778 } // namespace blink	947 } // namespace blink

OLD	NEW