Chromium Code Reviews| Index: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp |
| diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp |
| index 535e149180ceb34c2894146c34b5093f68f92c9c..ece8a514af4ad39b89b10f461c00e5023b9aa28a 100644 |
| --- a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp |
| +++ b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp |
| @@ -45,9 +45,12 @@ namespace blink { |
| using namespace HTMLNames; |
| static inline UChar ToLowerCase(UChar cc) { |
| - DCHECK(IsASCIIUpper(cc)); |
| - const int kLowerCaseOffset = 0x20; |
| - return cc + kLowerCaseOffset; |
| + DCHECK(IsASCIIAlpha(cc)); |
| + return cc | 0x20; |
| +} |
| + |
| +static inline UChar ToLowerCaseIfAlpha(UChar cc) { |
| + return cc | (IsASCIIUpper(cc) ? 0x20 : 0); |
| } |
| static inline bool VectorEqualsString(const Vector<LChar, 32>& vector, |
| @@ -235,16 +238,13 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kTagOpenState) { |
| - if (cc == '!') |
| + if (cc == '!') { |
|
Charlie Harrison
2017/04/19 00:03:38
Thanks for fixing up the style issues here. This c
|
| HTML_ADVANCE_TO(kMarkupDeclarationOpenState); |
| - else if (cc == '/') |
| + } else if (cc == '/') { |
| HTML_ADVANCE_TO(kEndTagOpenState); |
| - else if (IsASCIIUpper(cc)) { |
| + } else if (IsASCIIAlpha(cc)) { |
| token_->BeginStartTag(ToLowerCase(cc)); |
| HTML_ADVANCE_TO(kTagNameState); |
| - } else if (IsASCIILower(cc)) { |
| - token_->BeginStartTag(cc); |
| - HTML_ADVANCE_TO(kTagNameState); |
| } else if (cc == '?') { |
| ParseError(); |
| // The spec consumes the current character before switching |
| @@ -260,14 +260,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kEndTagOpenState) { |
| - if (IsASCIIUpper(cc)) { |
| + if (IsASCIIAlpha(cc)) { |
| token_->BeginEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| appropriate_end_tag_name_.Clear(); |
| HTML_ADVANCE_TO(kTagNameState); |
| - } else if (IsASCIILower(cc)) { |
| - token_->BeginEndTag(static_cast<LChar>(cc)); |
| - appropriate_end_tag_name_.Clear(); |
| - HTML_ADVANCE_TO(kTagNameState); |
| } else if (cc == '>') { |
| ParseError(); |
| HTML_ADVANCE_TO(kDataState); |
| @@ -284,20 +280,17 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kTagNameState) { |
| - if (IsTokenizerWhitespace(cc)) |
| + if (IsTokenizerWhitespace(cc)) { |
| HTML_ADVANCE_TO(kBeforeAttributeNameState); |
| - else if (cc == '/') |
| + } else if (cc == '/') { |
| HTML_ADVANCE_TO(kSelfClosingStartTagState); |
| - else if (cc == '>') |
| + } else if (cc == '>') { |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| - else if (IsASCIIUpper(cc)) { |
| - token_->AppendToName(ToLowerCase(cc)); |
| - HTML_ADVANCE_TO(kTagNameState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| } else { |
| - token_->AppendToName(cc); |
| + token_->AppendToName(ToLowerCaseIfAlpha(cc)); |
| HTML_ADVANCE_TO(kTagNameState); |
| } |
| } |
| @@ -316,14 +309,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kRCDATAEndTagOpenState) { |
| - if (IsASCIIUpper(cc)) { |
| + if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kRCDATAEndTagNameState); |
| - } else if (IsASCIILower(cc)) { |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| - AddToPossibleEndTag(static_cast<LChar>(cc)); |
| - HTML_ADVANCE_TO(kRCDATAEndTagNameState); |
| } else { |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| @@ -333,14 +322,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kRCDATAEndTagNameState) { |
| - if (IsASCIIUpper(cc)) { |
| + if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kRCDATAEndTagNameState); |
| - } else if (IsASCIILower(cc)) { |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| - AddToPossibleEndTag(static_cast<LChar>(cc)); |
| - HTML_ADVANCE_TO(kRCDATAEndTagNameState); |
| } else { |
| if (IsTokenizerWhitespace(cc)) { |
| if (IsAppropriateEndTag()) { |
| @@ -381,14 +366,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kRAWTEXTEndTagOpenState) { |
| - if (IsASCIIUpper(cc)) { |
| + if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kRAWTEXTEndTagNameState); |
| - } else if (IsASCIILower(cc)) { |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| - AddToPossibleEndTag(static_cast<LChar>(cc)); |
| - HTML_ADVANCE_TO(kRAWTEXTEndTagNameState); |
| } else { |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| @@ -398,14 +379,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kRAWTEXTEndTagNameState) { |
| - if (IsASCIIUpper(cc)) { |
| + if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kRAWTEXTEndTagNameState); |
| - } else if (IsASCIILower(cc)) { |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| - AddToPossibleEndTag(static_cast<LChar>(cc)); |
| - HTML_ADVANCE_TO(kRAWTEXTEndTagNameState); |
| } else { |
| if (IsTokenizerWhitespace(cc)) { |
| if (IsAppropriateEndTag()) { |
| @@ -450,14 +427,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kScriptDataEndTagOpenState) { |
| - if (IsASCIIUpper(cc)) { |
| + if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kScriptDataEndTagNameState); |
| - } else if (IsASCIILower(cc)) { |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| - AddToPossibleEndTag(static_cast<LChar>(cc)); |
| - HTML_ADVANCE_TO(kScriptDataEndTagNameState); |
| } else { |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| @@ -467,14 +440,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kScriptDataEndTagNameState) { |
| - if (IsASCIIUpper(cc)) { |
| + if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kScriptDataEndTagNameState); |
| - } else if (IsASCIILower(cc)) { |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| - AddToPossibleEndTag(static_cast<LChar>(cc)); |
| - HTML_ADVANCE_TO(kScriptDataEndTagNameState); |
| } else { |
| if (IsTokenizerWhitespace(cc)) { |
| if (IsAppropriateEndTag()) { |
| @@ -576,17 +545,11 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| temporary_buffer_.Clear(); |
| DCHECK(buffered_end_tag_name_.IsEmpty()); |
| HTML_ADVANCE_TO(kScriptDataEscapedEndTagOpenState); |
| - } else if (IsASCIIUpper(cc)) { |
| + } else if (IsASCIIAlpha(cc)) { |
| BufferCharacter('<'); |
| BufferCharacter(cc); |
| temporary_buffer_.Clear(); |
| - temporary_buffer_.push_back(ToLowerCase(cc)); |
| - HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState); |
| - } else if (IsASCIILower(cc)) { |
| - BufferCharacter('<'); |
| - BufferCharacter(cc); |
| - temporary_buffer_.Clear(); |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| + temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState); |
| } else { |
| BufferCharacter('<'); |
| @@ -596,14 +559,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kScriptDataEscapedEndTagOpenState) { |
| - if (IsASCIIUpper(cc)) { |
| + if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState); |
| - } else if (IsASCIILower(cc)) { |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| - AddToPossibleEndTag(static_cast<LChar>(cc)); |
| - HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState); |
| } else { |
| BufferCharacter('<'); |
| BufferCharacter('/'); |
| @@ -613,14 +572,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kScriptDataEscapedEndTagNameState) { |
| - if (IsASCIIUpper(cc)) { |
| + if (IsASCIIAlpha(cc)) { |
| temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState); |
| - } else if (IsASCIILower(cc)) { |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| - AddToPossibleEndTag(static_cast<LChar>(cc)); |
| - HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState); |
| } else { |
| if (IsTokenizerWhitespace(cc)) { |
| if (IsAppropriateEndTag()) { |
| @@ -655,13 +610,9 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| HTML_ADVANCE_TO(kScriptDataDoubleEscapedState); |
| else |
| HTML_ADVANCE_TO(kScriptDataEscapedState); |
| - } else if (IsASCIIUpper(cc)) { |
| + } else if (IsASCIIAlpha(cc)) { |
| BufferCharacter(cc); |
| - temporary_buffer_.push_back(ToLowerCase(cc)); |
| - HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState); |
| - } else if (IsASCIILower(cc)) { |
| - BufferCharacter(cc); |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| + temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState); |
| } else |
| HTML_RECONSUME_IN(kScriptDataEscapedState); |
| @@ -739,13 +690,9 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| HTML_ADVANCE_TO(kScriptDataEscapedState); |
| else |
| HTML_ADVANCE_TO(kScriptDataDoubleEscapedState); |
| - } else if (IsASCIIUpper(cc)) { |
| + } else if (IsASCIIAlpha(cc)) { |
| BufferCharacter(cc); |
| - temporary_buffer_.push_back(ToLowerCase(cc)); |
| - HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState); |
| - } else if (IsASCIILower(cc)) { |
| - BufferCharacter(cc); |
| - temporary_buffer_.push_back(static_cast<LChar>(cc)); |
| + temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc))); |
| HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState); |
| } else |
| HTML_RECONSUME_IN(kScriptDataDoubleEscapedState); |
| @@ -753,17 +700,12 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kBeforeAttributeNameState) { |
| - if (IsTokenizerWhitespace(cc)) |
| + if (IsTokenizerWhitespace(cc)) { |
| HTML_ADVANCE_TO(kBeforeAttributeNameState); |
| - else if (cc == '/') |
| + } else if (cc == '/') { |
| HTML_ADVANCE_TO(kSelfClosingStartTagState); |
| - else if (cc == '>') |
| + } else if (cc == '>') { |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| - else if (IsASCIIUpper(cc)) { |
| - token_->AddNewAttribute(); |
| - token_->BeginAttributeName(source.NumberOfCharactersConsumed()); |
| - token_->AppendToAttributeName(ToLowerCase(cc)); |
| - HTML_ADVANCE_TO(kAttributeNameState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| @@ -772,7 +714,7 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| ParseError(); |
| token_->AddNewAttribute(); |
| token_->BeginAttributeName(source.NumberOfCharactersConsumed()); |
| - token_->AppendToAttributeName(cc); |
| + token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc)); |
| HTML_ADVANCE_TO(kAttributeNameState); |
| } |
| } |
| @@ -791,9 +733,6 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| } else if (cc == '>') { |
| token_->EndAttributeName(source.NumberOfCharactersConsumed()); |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| - } else if (IsASCIIUpper(cc)) { |
| - token_->AppendToAttributeName(ToLowerCase(cc)); |
| - HTML_ADVANCE_TO(kAttributeNameState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->EndAttributeName(source.NumberOfCharactersConsumed()); |
| @@ -801,26 +740,21 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| } else { |
| if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') |
| ParseError(); |
| - token_->AppendToAttributeName(cc); |
| + token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc)); |
| HTML_ADVANCE_TO(kAttributeNameState); |
| } |
| } |
| END_STATE() |
| HTML_BEGIN_STATE(kAfterAttributeNameState) { |
| - if (IsTokenizerWhitespace(cc)) |
| + if (IsTokenizerWhitespace(cc)) { |
| HTML_ADVANCE_TO(kAfterAttributeNameState); |
| - else if (cc == '/') |
| + } else if (cc == '/') { |
| HTML_ADVANCE_TO(kSelfClosingStartTagState); |
| - else if (cc == '=') |
| + } else if (cc == '=') { |
| HTML_ADVANCE_TO(kBeforeAttributeValueState); |
| - else if (cc == '>') |
| + } else if (cc == '>') { |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| - else if (IsASCIIUpper(cc)) { |
| - token_->AddNewAttribute(); |
| - token_->BeginAttributeName(source.NumberOfCharactersConsumed()); |
| - token_->AppendToAttributeName(ToLowerCase(cc)); |
| - HTML_ADVANCE_TO(kAttributeNameState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| HTML_RECONSUME_IN(kDataState); |
| @@ -829,7 +763,7 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| ParseError(); |
| token_->AddNewAttribute(); |
| token_->BeginAttributeName(source.NumberOfCharactersConsumed()); |
| - token_->AppendToAttributeName(cc); |
| + token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc)); |
| HTML_ADVANCE_TO(kAttributeNameState); |
| } |
| } |
| @@ -1154,11 +1088,8 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| END_STATE() |
| HTML_BEGIN_STATE(kBeforeDOCTYPENameState) { |
| - if (IsTokenizerWhitespace(cc)) |
| + if (IsTokenizerWhitespace(cc)) { |
| HTML_ADVANCE_TO(kBeforeDOCTYPENameState); |
| - else if (IsASCIIUpper(cc)) { |
| - token_->BeginDOCTYPE(ToLowerCase(cc)); |
| - HTML_ADVANCE_TO(kDOCTYPENameState); |
| } else if (cc == '>') { |
| ParseError(); |
| token_->BeginDOCTYPE(); |
| @@ -1170,26 +1101,23 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| - token_->BeginDOCTYPE(cc); |
| + token_->BeginDOCTYPE(ToLowerCaseIfAlpha(cc)); |
| HTML_ADVANCE_TO(kDOCTYPENameState); |
| } |
| } |
| END_STATE() |
| HTML_BEGIN_STATE(kDOCTYPENameState) { |
| - if (IsTokenizerWhitespace(cc)) |
| + if (IsTokenizerWhitespace(cc)) { |
| HTML_ADVANCE_TO(kAfterDOCTYPENameState); |
| - else if (cc == '>') |
| + } else if (cc == '>') { |
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
| - else if (IsASCIIUpper(cc)) { |
| - token_->AppendToName(ToLowerCase(cc)); |
| - HTML_ADVANCE_TO(kDOCTYPENameState); |
| } else if (cc == kEndOfFileMarker) { |
| ParseError(); |
| token_->SetForceQuirks(); |
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
| } else { |
| - token_->AppendToName(cc); |
| + token_->AppendToName(ToLowerCaseIfAlpha(cc)); |
| HTML_ADVANCE_TO(kDOCTYPENameState); |
| } |
| } |