Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(140)

Side by Side Diff: third_party/WebKit/Source/core/css/parser/CSSTokenizer.cpp

Issue 1920583002: NOT FOR LANDING: Hack up CSSParser for speed. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: missing consts. Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "core/css/parser/CSSTokenizer.h" 5 #include "core/css/parser/CSSTokenizer.h"
6 6
7 namespace blink { 7 namespace blink {
8 #include "core/CSSTokenizerCodepoints.cpp" 8 #include "core/CSSTokenizerCodepoints.cpp"
9 } 9 }
10 10
11 #include "core/css/parser/CSSParserIdioms.h" 11 #include "core/css/parser/CSSParserIdioms.h"
12 #include "core/css/parser/CSSParserObserverWrapper.h" 12 #include "core/css/parser/CSSParserObserverWrapper.h"
13 #include "core/css/parser/CSSParserTokenRange.h" 13 #include "core/css/parser/CSSParserTokenRange.h"
14 #include "core/css/parser/CSSTokenizerInputStream.h" 14 #include "core/css/parser/CSSTokenizerInputStream.h"
15 #include "core/html/parser/HTMLParserIdioms.h" 15 #include "core/html/parser/HTMLParserIdioms.h"
16 #include "wtf/text/CharacterNames.h" 16 #include "wtf/text/CharacterNames.h"
17 17
18 namespace blink { 18 namespace blink {
19 19
20 CSSTokenizer::Scope::Scope(const String& string) 20 CSSTokenizer::Scope::Scope(const String& string)
21 : m_string(string) 21 : m_string(string)
22 { 22 {
23 // According to the spec, we should perform preprocessing here. 23 // According to the spec, we should perform preprocessing here.
24 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing 24 // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing
25 // 25 //
26 // However, we can skip this step since: 26 // However, we can skip this step since:
27 // * We're using HTML spaces (which accept \r and \f as a valid white space) 27 // * We're using HTML spaces (which accept \r and \f as a valid white space)
28 // * Do not count white spaces 28 // * Do not count white spaces
29 // * CSSTokenizerInputStream::peek replaces NULLs for replacement characters 29 // * CSSTokenizerInputStream::peek replaces NULLs for replacement characters
30 30
31 if (string.isEmpty()) 31 if (string.isEmpty())
32 return; 32 return;
33 33
34 // To avoid resizing we err on the side of reserving too much space. 34 // To avoid resizing we err on the side of reserving too much space.
35 // Most strings we tokenize have about 3.5 to 5 characters per token. 35 // Most strings we tokenize have about 3.5 to 5 characters per token.
36 m_tokens.reserveInitialCapacity(string.length() / 3); 36 m_tokens.reserveInitialCapacity(string.length() / 3);
37 37
38 CSSTokenizerInputStream input(string); 38 CSSTokenizerInputStream input(string);
39 CSSTokenizer tokenizer(input, *this); 39 CSSTokenizer tokenizer(input, *this);
40 while (true) { 40 while (true) {
41 CSSParserToken token = tokenizer.nextToken(); 41 CSSParserToken token = tokenizer.nextToken();
42 if (token.type() == CommentToken) 42 if (token.type() == CommentToken)
43 continue; 43 continue;
44 if (token.type() == EOFToken) 44 if (token.type() == EOFToken)
45 return; 45 return;
46 m_tokens.append(token); 46 m_tokens.append(token);
47 } 47 }
48 } 48 }
49 49
50 CSSTokenizer::Scope::Scope(const String& string, CSSParserObserverWrapper& wrapp er) 50 CSSTokenizer::Scope::Scope(const String& string, CSSParserObserverWrapper& wrapp er)
51 : m_string(string) 51 : m_string(string)
52 { 52 {
53 if (string.isEmpty()) 53 if (string.isEmpty())
54 return; 54 return;
55 55
56 CSSTokenizerInputStream input(string); 56 CSSTokenizerInputStream input(string);
57 CSSTokenizer tokenizer(input, *this); 57 CSSTokenizer tokenizer(input, *this);
58 58
59 unsigned offset = 0; 59 unsigned offset = 0;
60 while (true) { 60 while (true) {
61 CSSParserToken token = tokenizer.nextToken(); 61 CSSParserToken token = tokenizer.nextToken();
62 if (token.type() == EOFToken) 62 if (token.type() == EOFToken)
63 break; 63 break;
64 if (token.type() == CommentToken) { 64 if (token.type() == CommentToken) {
65 wrapper.addComment(offset, input.offset(), m_tokens.size()); 65 wrapper.addComment(offset, input.offset(), m_tokens.size());
66 } else { 66 } else {
67 m_tokens.append(token); 67 m_tokens.append(token);
68 wrapper.addToken(offset); 68 wrapper.addToken(offset);
69 } 69 }
70 offset = input.offset(); 70 offset = input.offset();
71 } 71 }
72 72
73 wrapper.addToken(offset); 73 wrapper.addToken(offset);
74 wrapper.finalizeConstruction(m_tokens.begin()); 74 wrapper.finalizeConstruction(m_tokens.begin());
75 } 75 }
76 76
77 CSSParserTokenRange CSSTokenizer::Scope::tokenRange() 77 CSSParserTokenRange CSSTokenizer::Scope::tokenRange()
78 { 78 {
79 return m_tokens; 79 return CSSParserTokenRange(m_tokens.begin(), m_tokens.end());
80 } 80 }
81 81
82 unsigned CSSTokenizer::Scope::tokenCount() 82 unsigned CSSTokenizer::Scope::tokenCount()
83 { 83 {
84 return m_tokens.size(); 84 return m_tokens.size();
85 } 85 }
86 86
87 static bool isNewLine(UChar cc) 87 static bool isNewLine(UChar cc)
88 { 88 {
89 // We check \r and \f here, since we have no preprocessing stage 89 // We check \r and \f here, since we have no preprocessing stage
(...skipping 28 matching lines...) Expand all
118 { 118 {
119 m_input.advance(offset); 119 m_input.advance(offset);
120 } 120 }
121 121
122 CSSParserToken CSSTokenizer::whiteSpace(UChar cc) 122 CSSParserToken CSSTokenizer::whiteSpace(UChar cc)
123 { 123 {
124 consumeUntilNonWhitespace(); 124 consumeUntilNonWhitespace();
125 return CSSParserToken(WhitespaceToken); 125 return CSSParserToken(WhitespaceToken);
126 } 126 }
127 127
128 static bool popIfBlockMatches(Vector<CSSParserTokenType>& blockStack, CSSParserT okenType type) 128 static bool popIfBlockMatches(Vector<CSSParserTokenType, 8>& blockStack, CSSPars erTokenType type)
129 { 129 {
130 if (!blockStack.isEmpty() && blockStack.last() == type) { 130 if (!blockStack.isEmpty() && blockStack.last() == type) {
131 blockStack.removeLast(); 131 blockStack.removeLast();
132 return true; 132 return true;
133 } 133 }
134 return false; 134 return false;
135 } 135 }
136 136
137 CSSParserToken CSSTokenizer::blockStart(CSSParserTokenType type) 137 CSSParserToken CSSTokenizer::blockStart(CSSParserTokenType type)
138 { 138 {
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after
351 CSSParserToken CSSTokenizer::nextToken() 351 CSSParserToken CSSTokenizer::nextToken()
352 { 352 {
353 // Unlike the HTMLTokenizer, the CSS Syntax spec is written 353 // Unlike the HTMLTokenizer, the CSS Syntax spec is written
354 // as a stateless, (fixed-size) look-ahead tokenizer. 354 // as a stateless, (fixed-size) look-ahead tokenizer.
355 // We could move to the stateful model and instead create 355 // We could move to the stateful model and instead create
356 // states for all the "next 3 codepoints are X" cases. 356 // states for all the "next 3 codepoints are X" cases.
357 // State-machine tokenizers are easier to write to handle 357 // State-machine tokenizers are easier to write to handle
358 // incremental tokenization of partial sources. 358 // incremental tokenization of partial sources.
359 // However, for now we follow the spec exactly. 359 // However, for now we follow the spec exactly.
360 UChar cc = consume(); 360 UChar cc = consume();
361 CodePoint codePointFunc = 0;
362 361
363 if (isASCII(cc)) { 362 switch (cc) {
364 ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber); 363 case 0:
365 codePointFunc = codePoints[cc]; 364 return endOfFile(cc);
366 } else { 365 case 1:
367 codePointFunc = &CSSTokenizer::nameStart; 366 case 2:
367 case 3:
368 case 4:
369 case 5:
370 case 6:
371 case 7:
372 case 8:
373 return CSSParserToken(DelimiterToken, cc);
esprehn 2016/04/22 23:30:58 Skip the virtual call overhead of the lookup table
Timothy Loh 2016/04/27 06:52:05 This is definitely an improvement (I think using c
374 case 9:
375 case 10:
376 return whiteSpace(cc);
377 case 11:
378 return CSSParserToken(DelimiterToken, cc);
379 case 12:
380 case 13:
381 return whiteSpace(cc);
382 case 14:
383 case 15:
384 case 16:
385 case 17:
386 case 18:
387 case 19:
388 case 20:
389 case 21:
390 case 22:
391 case 23:
392 case 24:
393 case 25:
394 case 26:
395 case 27:
396 case 28:
397 case 29:
398 case 30:
399 case 31:
400 return CSSParserToken(DelimiterToken, cc);
401 case 32:
402 return whiteSpace(cc);
403 case 33:
404 return CSSParserToken(DelimiterToken, cc);
405 case 34:
406 return stringStart(cc);
407 case 35:
408 return hash(cc);
409 case 36:
410 return dollarSign(cc);
411 case 37:
412 case 38:
413 return CSSParserToken(DelimiterToken, cc);
414 case 39:
415 return stringStart(cc);
416 case 40:
417 return leftParenthesis(cc);
418 case 41:
419 return rightParenthesis(cc);
420 case 42:
421 return asterisk(cc);
422 case 43:
423 return plusOrFullStop(cc);
424 case 44:
425 return comma(cc);
426 case 45:
427 return hyphenMinus(cc);
428 case 46:
429 return plusOrFullStop(cc);
430 case 47:
431 return solidus(cc);
432 case 48:
433 case 49:
434 case 50:
435 case 51:
436 case 52:
437 case 53:
438 case 54:
439 case 55:
440 case 56:
441 case 57:
442 return asciiDigit(cc);
443 case 58:
444 return colon(cc);
445 case 59:
446 return semiColon(cc);
447 case 60:
448 return lessThan(cc);
449 case 61:
450 case 62:
451 case 63:
452 return CSSParserToken(DelimiterToken, cc);
453 case 64:
454 return commercialAt(cc);
455 case 65:
456 case 66:
457 case 67:
458 case 68:
459 case 69:
460 case 70:
461 case 71:
462 case 72:
463 case 73:
464 case 74:
465 case 75:
466 case 76:
467 case 77:
468 case 78:
469 case 79:
470 case 80:
471 case 81:
472 case 82:
473 case 83:
474 case 84:
475 return nameStart(cc);
476 case 85:
477 return letterU(cc);
478 case 86:
479 case 87:
480 case 88:
481 case 89:
482 case 90:
483 return nameStart(cc);
484 case 91:
485 return leftBracket(cc);
486 case 92:
487 return reverseSolidus(cc);
488 case 93:
489 return rightBracket(cc);
490 case 94:
491 return circumflexAccent(cc);
492 case 95:
493 return nameStart(cc);
494 case 96:
495 return CSSParserToken(DelimiterToken, cc);
496 case 97:
497 case 98:
498 case 99:
499 case 100:
500 case 101:
501 case 102:
502 case 103:
503 case 104:
504 case 105:
505 case 106:
506 case 107:
507 case 108:
508 case 109:
509 case 110:
510 case 111:
511 case 112:
512 case 113:
513 case 114:
514 case 115:
515 case 116:
516 return nameStart(cc);
517 case 117:
518 return letterU(cc);
519 case 118:
520 case 119:
521 case 120:
522 case 121:
523 case 122:
524 return nameStart(cc);
525 case 123:
526 return leftBrace(cc);
527 case 124:
528 return verticalLine(cc);
529 case 125:
530 return rightBrace(cc);
531 case 126:
532 return tilde(cc);
533 case 127:
534 return CSSParserToken(DelimiterToken, cc);
368 } 535 }
369 536
370 if (codePointFunc) 537 ASSERT(!isASCII(cc));
371 return ((this)->*(codePointFunc))(cc); 538 return nameStart(cc);
372 return CSSParserToken(DelimiterToken, cc);
373 } 539 }
374 540
375 static NumericSign getSign(CSSTokenizerInputStream& input, unsigned& offset) 541 static NumericSign getSign(CSSTokenizerInputStream& input, unsigned& offset)
376 { 542 {
377 if (input.nextInputChar() == '+') { 543 if (input.nextInputChar() == '+') {
378 ++offset; 544 ++offset;
379 return PlusSign; 545 return PlusSign;
380 } 546 }
381 if (input.nextInputChar() == '-') { 547 if (input.nextInputChar() == '-') {
382 ++offset; 548 ++offset;
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after
612 { 778 {
613 while (true) { 779 while (true) {
614 UChar cc = consume(); 780 UChar cc = consume();
615 if (cc == ')' || cc == kEndOfFileMarker) 781 if (cc == ')' || cc == kEndOfFileMarker)
616 return; 782 return;
617 if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) 783 if (twoCharsAreValidEscape(cc, m_input.nextInputChar()))
618 consumeEscape(); 784 consumeEscape();
619 } 785 }
620 } 786 }
621 787
622 void CSSTokenizer::consumeUntilNonWhitespace() 788 ALWAYS_INLINE void CSSTokenizer::consumeUntilNonWhitespace()
623 { 789 {
624 // Using HTML space here rather than CSS space since we don't do preprocessi ng 790 // Using HTML space here rather than CSS space since we don't do preprocessi ng
625 while (isHTMLSpace<UChar>(m_input.nextInputChar())) 791 while (isHTMLSpace<UChar>(m_input.nextInputChar()))
626 consume(); 792 consume();
627 } 793 }
628 794
629 void CSSTokenizer::consumeSingleWhitespaceIfNext() 795 void CSSTokenizer::consumeSingleWhitespaceIfNext()
630 { 796 {
631 // We check for \r\n and HTML spaces since we don't do preprocessing 797 // We check for \r\n and HTML spaces since we don't do preprocessing
632 UChar c = m_input.nextInputChar(); 798 UChar c = m_input.nextInputChar();
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
742 { 908 {
743 UChar first = consume(); 909 UChar first = consume();
744 bool areNumber = nextCharsAreNumber(first); 910 bool areNumber = nextCharsAreNumber(first);
745 reconsume(first); 911 reconsume(first);
746 return areNumber; 912 return areNumber;
747 } 913 }
748 914
749 // http://dev.w3.org/csswg/css-syntax/#would-start-an-identifier 915 // http://dev.w3.org/csswg/css-syntax/#would-start-an-identifier
750 bool CSSTokenizer::nextCharsAreIdentifier(UChar first) 916 bool CSSTokenizer::nextCharsAreIdentifier(UChar first)
751 { 917 {
918 if (isNameStartCodePoint(first))
919 return true;
920
752 UChar second = m_input.nextInputChar(); 921 UChar second = m_input.nextInputChar();
753 if (isNameStartCodePoint(first) || twoCharsAreValidEscape(first, second)) 922 if (twoCharsAreValidEscape(first, second))
754 return true; 923 return true;
755 924
756 if (first == '-') 925 if (first == '-')
757 return isNameStartCodePoint(second) || second == '-' || nextTwoCharsAreV alidEscape(); 926 return isNameStartCodePoint(second) || second == '-' || nextTwoCharsAreV alidEscape();
758 927
759 return false; 928 return false;
760 } 929 }
761 930
762 bool CSSTokenizer::nextCharsAreIdentifier() 931 bool CSSTokenizer::nextCharsAreIdentifier()
763 { 932 {
764 UChar first = consume(); 933 UChar first = consume();
765 bool areIdentifier = nextCharsAreIdentifier(first); 934 bool areIdentifier = nextCharsAreIdentifier(first);
766 reconsume(first); 935 reconsume(first);
767 return areIdentifier; 936 return areIdentifier;
768 } 937 }
769 938
770 CSSParserString CSSTokenizer::registerString(const String& string) 939 CSSParserString CSSTokenizer::registerString(const String& string)
771 { 940 {
772 m_scope.storeString(string); 941 m_scope.storeString(string);
773 CSSParserString result; 942 CSSParserString result;
774 result.init(string); 943 result.init(string);
775 return result; 944 return result;
776 } 945 }
777 946
778 } // namespace blink 947 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698