| Index: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
|
| diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
|
| index ec8889f24a22d84b24bd1f445162454ecb33fa88..c023053c915542fba85ad95bf09d42e22e291f9d 100644
|
| --- a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
|
| +++ b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
|
| @@ -45,9 +45,12 @@ namespace blink {
|
| using namespace HTMLNames;
|
|
|
| static inline UChar ToLowerCase(UChar cc) {
|
| - DCHECK(IsASCIIUpper(cc));
|
| - const int kLowerCaseOffset = 0x20;
|
| - return cc + kLowerCaseOffset;
|
| + DCHECK(IsASCIIAlpha(cc));
|
| + return cc | 0x20;
|
| +}
|
| +
|
| +static inline UChar ToLowerCaseIfAlpha(UChar cc) {
|
| + return cc | (IsASCIIUpper(cc) ? 0x20 : 0);
|
| }
|
|
|
| static inline bool VectorEqualsString(const Vector<LChar, 32>& vector,
|
| @@ -235,16 +238,13 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kTagOpenState) {
|
| - if (cc == '!')
|
| + if (cc == '!') {
|
| HTML_ADVANCE_TO(kMarkupDeclarationOpenState);
|
| - else if (cc == '/')
|
| + } else if (cc == '/') {
|
| HTML_ADVANCE_TO(kEndTagOpenState);
|
| - else if (IsASCIIUpper(cc)) {
|
| + } else if (IsASCIIAlpha(cc)) {
|
| token_->BeginStartTag(ToLowerCase(cc));
|
| HTML_ADVANCE_TO(kTagNameState);
|
| - } else if (IsASCIILower(cc)) {
|
| - token_->BeginStartTag(cc);
|
| - HTML_ADVANCE_TO(kTagNameState);
|
| } else if (cc == '?') {
|
| ParseError();
|
| // The spec consumes the current character before switching
|
| @@ -260,14 +260,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kEndTagOpenState) {
|
| - if (IsASCIIUpper(cc)) {
|
| + if (IsASCIIAlpha(cc)) {
|
| token_->BeginEndTag(static_cast<LChar>(ToLowerCase(cc)));
|
| appropriate_end_tag_name_.clear();
|
| HTML_ADVANCE_TO(kTagNameState);
|
| - } else if (IsASCIILower(cc)) {
|
| - token_->BeginEndTag(static_cast<LChar>(cc));
|
| - appropriate_end_tag_name_.clear();
|
| - HTML_ADVANCE_TO(kTagNameState);
|
| } else if (cc == '>') {
|
| ParseError();
|
| HTML_ADVANCE_TO(kDataState);
|
| @@ -284,20 +280,17 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kTagNameState) {
|
| - if (IsTokenizerWhitespace(cc))
|
| + if (IsTokenizerWhitespace(cc)) {
|
| HTML_ADVANCE_TO(kBeforeAttributeNameState);
|
| - else if (cc == '/')
|
| + } else if (cc == '/') {
|
| HTML_ADVANCE_TO(kSelfClosingStartTagState);
|
| - else if (cc == '>')
|
| + } else if (cc == '>') {
|
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
|
| - else if (IsASCIIUpper(cc)) {
|
| - token_->AppendToName(ToLowerCase(cc));
|
| - HTML_ADVANCE_TO(kTagNameState);
|
| } else if (cc == kEndOfFileMarker) {
|
| ParseError();
|
| HTML_RECONSUME_IN(kDataState);
|
| } else {
|
| - token_->AppendToName(cc);
|
| + token_->AppendToName(ToLowerCaseIfAlpha(cc));
|
| HTML_ADVANCE_TO(kTagNameState);
|
| }
|
| }
|
| @@ -316,14 +309,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kRCDATAEndTagOpenState) {
|
| - if (IsASCIIUpper(cc)) {
|
| + if (IsASCIIAlpha(cc)) {
|
| temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kRCDATAEndTagNameState);
|
| - } else if (IsASCIILower(cc)) {
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| - AddToPossibleEndTag(static_cast<LChar>(cc));
|
| - HTML_ADVANCE_TO(kRCDATAEndTagNameState);
|
| } else {
|
| BufferCharacter('<');
|
| BufferCharacter('/');
|
| @@ -333,14 +322,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kRCDATAEndTagNameState) {
|
| - if (IsASCIIUpper(cc)) {
|
| + if (IsASCIIAlpha(cc)) {
|
| temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kRCDATAEndTagNameState);
|
| - } else if (IsASCIILower(cc)) {
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| - AddToPossibleEndTag(static_cast<LChar>(cc));
|
| - HTML_ADVANCE_TO(kRCDATAEndTagNameState);
|
| } else {
|
| if (IsTokenizerWhitespace(cc)) {
|
| if (IsAppropriateEndTag()) {
|
| @@ -381,14 +366,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kRAWTEXTEndTagOpenState) {
|
| - if (IsASCIIUpper(cc)) {
|
| + if (IsASCIIAlpha(cc)) {
|
| temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
|
| - } else if (IsASCIILower(cc)) {
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| - AddToPossibleEndTag(static_cast<LChar>(cc));
|
| - HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
|
| } else {
|
| BufferCharacter('<');
|
| BufferCharacter('/');
|
| @@ -398,14 +379,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kRAWTEXTEndTagNameState) {
|
| - if (IsASCIIUpper(cc)) {
|
| + if (IsASCIIAlpha(cc)) {
|
| temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
|
| - } else if (IsASCIILower(cc)) {
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| - AddToPossibleEndTag(static_cast<LChar>(cc));
|
| - HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
|
| } else {
|
| if (IsTokenizerWhitespace(cc)) {
|
| if (IsAppropriateEndTag()) {
|
| @@ -450,14 +427,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kScriptDataEndTagOpenState) {
|
| - if (IsASCIIUpper(cc)) {
|
| + if (IsASCIIAlpha(cc)) {
|
| temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kScriptDataEndTagNameState);
|
| - } else if (IsASCIILower(cc)) {
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| - AddToPossibleEndTag(static_cast<LChar>(cc));
|
| - HTML_ADVANCE_TO(kScriptDataEndTagNameState);
|
| } else {
|
| BufferCharacter('<');
|
| BufferCharacter('/');
|
| @@ -467,14 +440,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kScriptDataEndTagNameState) {
|
| - if (IsASCIIUpper(cc)) {
|
| + if (IsASCIIAlpha(cc)) {
|
| temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kScriptDataEndTagNameState);
|
| - } else if (IsASCIILower(cc)) {
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| - AddToPossibleEndTag(static_cast<LChar>(cc));
|
| - HTML_ADVANCE_TO(kScriptDataEndTagNameState);
|
| } else {
|
| if (IsTokenizerWhitespace(cc)) {
|
| if (IsAppropriateEndTag()) {
|
| @@ -576,17 +545,11 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| temporary_buffer_.clear();
|
| DCHECK(buffered_end_tag_name_.IsEmpty());
|
| HTML_ADVANCE_TO(kScriptDataEscapedEndTagOpenState);
|
| - } else if (IsASCIIUpper(cc)) {
|
| + } else if (IsASCIIAlpha(cc)) {
|
| BufferCharacter('<');
|
| BufferCharacter(cc);
|
| temporary_buffer_.clear();
|
| - temporary_buffer_.push_back(ToLowerCase(cc));
|
| - HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
|
| - } else if (IsASCIILower(cc)) {
|
| - BufferCharacter('<');
|
| - BufferCharacter(cc);
|
| - temporary_buffer_.clear();
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| + temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
|
| } else {
|
| BufferCharacter('<');
|
| @@ -596,14 +559,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kScriptDataEscapedEndTagOpenState) {
|
| - if (IsASCIIUpper(cc)) {
|
| + if (IsASCIIAlpha(cc)) {
|
| temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
|
| - } else if (IsASCIILower(cc)) {
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| - AddToPossibleEndTag(static_cast<LChar>(cc));
|
| - HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
|
| } else {
|
| BufferCharacter('<');
|
| BufferCharacter('/');
|
| @@ -613,14 +572,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kScriptDataEscapedEndTagNameState) {
|
| - if (IsASCIIUpper(cc)) {
|
| + if (IsASCIIAlpha(cc)) {
|
| temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
|
| - } else if (IsASCIILower(cc)) {
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| - AddToPossibleEndTag(static_cast<LChar>(cc));
|
| - HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
|
| } else {
|
| if (IsTokenizerWhitespace(cc)) {
|
| if (IsAppropriateEndTag()) {
|
| @@ -655,13 +610,9 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| HTML_ADVANCE_TO(kScriptDataDoubleEscapedState);
|
| else
|
| HTML_ADVANCE_TO(kScriptDataEscapedState);
|
| - } else if (IsASCIIUpper(cc)) {
|
| + } else if (IsASCIIAlpha(cc)) {
|
| BufferCharacter(cc);
|
| - temporary_buffer_.push_back(ToLowerCase(cc));
|
| - HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
|
| - } else if (IsASCIILower(cc)) {
|
| - BufferCharacter(cc);
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| + temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
|
| } else
|
| HTML_RECONSUME_IN(kScriptDataEscapedState);
|
| @@ -739,13 +690,9 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| HTML_ADVANCE_TO(kScriptDataEscapedState);
|
| else
|
| HTML_ADVANCE_TO(kScriptDataDoubleEscapedState);
|
| - } else if (IsASCIIUpper(cc)) {
|
| + } else if (IsASCIIAlpha(cc)) {
|
| BufferCharacter(cc);
|
| - temporary_buffer_.push_back(ToLowerCase(cc));
|
| - HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState);
|
| - } else if (IsASCIILower(cc)) {
|
| - BufferCharacter(cc);
|
| - temporary_buffer_.push_back(static_cast<LChar>(cc));
|
| + temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc)));
|
| HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState);
|
| } else
|
| HTML_RECONSUME_IN(kScriptDataDoubleEscapedState);
|
| @@ -753,17 +700,12 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kBeforeAttributeNameState) {
|
| - if (IsTokenizerWhitespace(cc))
|
| + if (IsTokenizerWhitespace(cc)) {
|
| HTML_ADVANCE_TO(kBeforeAttributeNameState);
|
| - else if (cc == '/')
|
| + } else if (cc == '/') {
|
| HTML_ADVANCE_TO(kSelfClosingStartTagState);
|
| - else if (cc == '>')
|
| + } else if (cc == '>') {
|
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
|
| - else if (IsASCIIUpper(cc)) {
|
| - token_->AddNewAttribute();
|
| - token_->BeginAttributeName(source.NumberOfCharactersConsumed());
|
| - token_->AppendToAttributeName(ToLowerCase(cc));
|
| - HTML_ADVANCE_TO(kAttributeNameState);
|
| } else if (cc == kEndOfFileMarker) {
|
| ParseError();
|
| HTML_RECONSUME_IN(kDataState);
|
| @@ -772,7 +714,7 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| ParseError();
|
| token_->AddNewAttribute();
|
| token_->BeginAttributeName(source.NumberOfCharactersConsumed());
|
| - token_->AppendToAttributeName(cc);
|
| + token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc));
|
| HTML_ADVANCE_TO(kAttributeNameState);
|
| }
|
| }
|
| @@ -791,9 +733,6 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| } else if (cc == '>') {
|
| token_->EndAttributeName(source.NumberOfCharactersConsumed());
|
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
|
| - } else if (IsASCIIUpper(cc)) {
|
| - token_->AppendToAttributeName(ToLowerCase(cc));
|
| - HTML_ADVANCE_TO(kAttributeNameState);
|
| } else if (cc == kEndOfFileMarker) {
|
| ParseError();
|
| token_->EndAttributeName(source.NumberOfCharactersConsumed());
|
| @@ -801,26 +740,21 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| } else {
|
| if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
|
| ParseError();
|
| - token_->AppendToAttributeName(cc);
|
| + token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc));
|
| HTML_ADVANCE_TO(kAttributeNameState);
|
| }
|
| }
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kAfterAttributeNameState) {
|
| - if (IsTokenizerWhitespace(cc))
|
| + if (IsTokenizerWhitespace(cc)) {
|
| HTML_ADVANCE_TO(kAfterAttributeNameState);
|
| - else if (cc == '/')
|
| + } else if (cc == '/') {
|
| HTML_ADVANCE_TO(kSelfClosingStartTagState);
|
| - else if (cc == '=')
|
| + } else if (cc == '=') {
|
| HTML_ADVANCE_TO(kBeforeAttributeValueState);
|
| - else if (cc == '>')
|
| + } else if (cc == '>') {
|
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
|
| - else if (IsASCIIUpper(cc)) {
|
| - token_->AddNewAttribute();
|
| - token_->BeginAttributeName(source.NumberOfCharactersConsumed());
|
| - token_->AppendToAttributeName(ToLowerCase(cc));
|
| - HTML_ADVANCE_TO(kAttributeNameState);
|
| } else if (cc == kEndOfFileMarker) {
|
| ParseError();
|
| HTML_RECONSUME_IN(kDataState);
|
| @@ -829,7 +763,7 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| ParseError();
|
| token_->AddNewAttribute();
|
| token_->BeginAttributeName(source.NumberOfCharactersConsumed());
|
| - token_->AppendToAttributeName(cc);
|
| + token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc));
|
| HTML_ADVANCE_TO(kAttributeNameState);
|
| }
|
| }
|
| @@ -1154,11 +1088,8 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kBeforeDOCTYPENameState) {
|
| - if (IsTokenizerWhitespace(cc))
|
| + if (IsTokenizerWhitespace(cc)) {
|
| HTML_ADVANCE_TO(kBeforeDOCTYPENameState);
|
| - else if (IsASCIIUpper(cc)) {
|
| - token_->BeginDOCTYPE(ToLowerCase(cc));
|
| - HTML_ADVANCE_TO(kDOCTYPENameState);
|
| } else if (cc == '>') {
|
| ParseError();
|
| token_->BeginDOCTYPE();
|
| @@ -1170,26 +1101,23 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
|
| token_->SetForceQuirks();
|
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState);
|
| } else {
|
| - token_->BeginDOCTYPE(cc);
|
| + token_->BeginDOCTYPE(ToLowerCaseIfAlpha(cc));
|
| HTML_ADVANCE_TO(kDOCTYPENameState);
|
| }
|
| }
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(kDOCTYPENameState) {
|
| - if (IsTokenizerWhitespace(cc))
|
| + if (IsTokenizerWhitespace(cc)) {
|
| HTML_ADVANCE_TO(kAfterDOCTYPENameState);
|
| - else if (cc == '>')
|
| + } else if (cc == '>') {
|
| return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
|
| - else if (IsASCIIUpper(cc)) {
|
| - token_->AppendToName(ToLowerCase(cc));
|
| - HTML_ADVANCE_TO(kDOCTYPENameState);
|
| } else if (cc == kEndOfFileMarker) {
|
| ParseError();
|
| token_->SetForceQuirks();
|
| return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState);
|
| } else {
|
| - token_->AppendToName(cc);
|
| + token_->AppendToName(ToLowerCaseIfAlpha(cc));
|
| HTML_ADVANCE_TO(kDOCTYPENameState);
|
| }
|
| }
|
|
|