Index: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp |
diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp |
index ec8889f24a22d84b24bd1f445162454ecb33fa88..c023053c915542fba85ad95bf09d42e22e291f9d 100644 |
--- a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp |
+++ b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp |
@@ -45,9 +45,12 @@ namespace blink { |
using namespace HTMLNames; |
static inline UChar ToLowerCase(UChar cc) { |
- DCHECK(IsASCIIUpper(cc)); |
- const int kLowerCaseOffset = 0x20; |
- return cc + kLowerCaseOffset; |
+ DCHECK(IsASCIIAlpha(cc)); |
+ return cc | 0x20; |
+} |
+ |
+static inline UChar ToLowerCaseIfAlpha(UChar cc) { |
+ return cc | (IsASCIIUpper(cc) ? 0x20 : 0); |
} |
static inline bool VectorEqualsString(const Vector<LChar, 32>& vector, |
@@ -235,16 +238,13 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kTagOpenState) { |
- if (cc == '!') |
+ if (cc == '!') { |
HTML_ADVANCE_TO(kMarkupDeclarationOpenState); |
- else if (cc == '/') |
+ } else if (cc == '/') { |
HTML_ADVANCE_TO(kEndTagOpenState); |
- else if (IsASCIIUpper(cc)) { |
+ } else if (IsASCIIAlpha(cc)) { |
token_->BeginStartTag(ToLowerCase(cc)); |
HTML_ADVANCE_TO(kTagNameState); |
- } else if (IsASCIILower(cc)) { |
- token_->BeginStartTag(cc); |
- HTML_ADVANCE_TO(kTagNameState); |
} else if (cc == '?') { |
ParseError(); |
// The spec consumes the current character before switching |
@@ -260,14 +260,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kEndTagOpenState) { |
- if (IsASCIIUpper(cc)) { |
+ if (IsASCIIAlpha(cc)) { |
token_->BeginEndTag(static_cast<LChar>(ToLowerCase(cc))); |
appropriate_end_tag_name_.clear(); |
HTML_ADVANCE_TO(kTagNameState); |
- } else if (IsASCIILower(cc)) { |
- token_->BeginEndTag(static_cast<LChar>(cc)); |
- appropriate_end_tag_name_.clear(); |
- HTML_ADVANCE_TO(kTagNameState); |
} else if (cc == '>') { |
ParseError(); |
HTML_ADVANCE_TO(kDataState); |
@@ -284,20 +280,17 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kTagNameState) { |
- if (IsTokenizerWhitespace(cc)) |
+ if (IsTokenizerWhitespace(cc)) { |
HTML_ADVANCE_TO(kBeforeAttributeNameState); |
- else if (cc == '/') |
+ } else if (cc == '/') { |
HTML_ADVANCE_TO(kSelfClosingStartTagState); |
- else if (cc == '>') |
+ } else if (cc == '>') { |
return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
- else if (IsASCIIUpper(cc)) { |
- token_->AppendToName(ToLowerCase(cc)); |
- HTML_ADVANCE_TO(kTagNameState); |
} else if (cc == kEndOfFileMarker) { |
ParseError(); |
HTML_RECONSUME_IN(kDataState); |
} else { |
- token_->AppendToName(cc); |
+ token_->AppendToName(ToLowerCaseIfAlpha(cc)); |
HTML_ADVANCE_TO(kTagNameState); |
} |
} |
@@ -316,14 +309,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kRCDATAEndTagOpenState) { |
- if (IsASCIIUpper(cc)) { |
+ if (IsASCIIAlpha(cc)) { |
temporary_buffer_.push_back(static_cast<LChar>(cc)); |
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kRCDATAEndTagNameState); |
- } else if (IsASCIILower(cc)) { |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
- AddToPossibleEndTag(static_cast<LChar>(cc)); |
- HTML_ADVANCE_TO(kRCDATAEndTagNameState); |
} else { |
BufferCharacter('<'); |
BufferCharacter('/'); |
@@ -333,14 +322,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kRCDATAEndTagNameState) { |
- if (IsASCIIUpper(cc)) { |
+ if (IsASCIIAlpha(cc)) { |
temporary_buffer_.push_back(static_cast<LChar>(cc)); |
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kRCDATAEndTagNameState); |
- } else if (IsASCIILower(cc)) { |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
- AddToPossibleEndTag(static_cast<LChar>(cc)); |
- HTML_ADVANCE_TO(kRCDATAEndTagNameState); |
} else { |
if (IsTokenizerWhitespace(cc)) { |
if (IsAppropriateEndTag()) { |
@@ -381,14 +366,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kRAWTEXTEndTagOpenState) { |
- if (IsASCIIUpper(cc)) { |
+ if (IsASCIIAlpha(cc)) { |
temporary_buffer_.push_back(static_cast<LChar>(cc)); |
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kRAWTEXTEndTagNameState); |
- } else if (IsASCIILower(cc)) { |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
- AddToPossibleEndTag(static_cast<LChar>(cc)); |
- HTML_ADVANCE_TO(kRAWTEXTEndTagNameState); |
} else { |
BufferCharacter('<'); |
BufferCharacter('/'); |
@@ -398,14 +379,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kRAWTEXTEndTagNameState) { |
- if (IsASCIIUpper(cc)) { |
+ if (IsASCIIAlpha(cc)) { |
temporary_buffer_.push_back(static_cast<LChar>(cc)); |
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kRAWTEXTEndTagNameState); |
- } else if (IsASCIILower(cc)) { |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
- AddToPossibleEndTag(static_cast<LChar>(cc)); |
- HTML_ADVANCE_TO(kRAWTEXTEndTagNameState); |
} else { |
if (IsTokenizerWhitespace(cc)) { |
if (IsAppropriateEndTag()) { |
@@ -450,14 +427,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kScriptDataEndTagOpenState) { |
- if (IsASCIIUpper(cc)) { |
+ if (IsASCIIAlpha(cc)) { |
temporary_buffer_.push_back(static_cast<LChar>(cc)); |
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kScriptDataEndTagNameState); |
- } else if (IsASCIILower(cc)) { |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
- AddToPossibleEndTag(static_cast<LChar>(cc)); |
- HTML_ADVANCE_TO(kScriptDataEndTagNameState); |
} else { |
BufferCharacter('<'); |
BufferCharacter('/'); |
@@ -467,14 +440,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kScriptDataEndTagNameState) { |
- if (IsASCIIUpper(cc)) { |
+ if (IsASCIIAlpha(cc)) { |
temporary_buffer_.push_back(static_cast<LChar>(cc)); |
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kScriptDataEndTagNameState); |
- } else if (IsASCIILower(cc)) { |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
- AddToPossibleEndTag(static_cast<LChar>(cc)); |
- HTML_ADVANCE_TO(kScriptDataEndTagNameState); |
} else { |
if (IsTokenizerWhitespace(cc)) { |
if (IsAppropriateEndTag()) { |
@@ -576,17 +545,11 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
temporary_buffer_.clear(); |
DCHECK(buffered_end_tag_name_.IsEmpty()); |
HTML_ADVANCE_TO(kScriptDataEscapedEndTagOpenState); |
- } else if (IsASCIIUpper(cc)) { |
+ } else if (IsASCIIAlpha(cc)) { |
BufferCharacter('<'); |
BufferCharacter(cc); |
temporary_buffer_.clear(); |
- temporary_buffer_.push_back(ToLowerCase(cc)); |
- HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState); |
- } else if (IsASCIILower(cc)) { |
- BufferCharacter('<'); |
- BufferCharacter(cc); |
- temporary_buffer_.clear(); |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
+ temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState); |
} else { |
BufferCharacter('<'); |
@@ -596,14 +559,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kScriptDataEscapedEndTagOpenState) { |
- if (IsASCIIUpper(cc)) { |
+ if (IsASCIIAlpha(cc)) { |
temporary_buffer_.push_back(static_cast<LChar>(cc)); |
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState); |
- } else if (IsASCIILower(cc)) { |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
- AddToPossibleEndTag(static_cast<LChar>(cc)); |
- HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState); |
} else { |
BufferCharacter('<'); |
BufferCharacter('/'); |
@@ -613,14 +572,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kScriptDataEscapedEndTagNameState) { |
- if (IsASCIIUpper(cc)) { |
+ if (IsASCIIAlpha(cc)) { |
temporary_buffer_.push_back(static_cast<LChar>(cc)); |
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState); |
- } else if (IsASCIILower(cc)) { |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
- AddToPossibleEndTag(static_cast<LChar>(cc)); |
- HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState); |
} else { |
if (IsTokenizerWhitespace(cc)) { |
if (IsAppropriateEndTag()) { |
@@ -655,13 +610,9 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
HTML_ADVANCE_TO(kScriptDataDoubleEscapedState); |
else |
HTML_ADVANCE_TO(kScriptDataEscapedState); |
- } else if (IsASCIIUpper(cc)) { |
+ } else if (IsASCIIAlpha(cc)) { |
BufferCharacter(cc); |
- temporary_buffer_.push_back(ToLowerCase(cc)); |
- HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState); |
- } else if (IsASCIILower(cc)) { |
- BufferCharacter(cc); |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
+ temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState); |
} else |
HTML_RECONSUME_IN(kScriptDataEscapedState); |
@@ -739,13 +690,9 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
HTML_ADVANCE_TO(kScriptDataEscapedState); |
else |
HTML_ADVANCE_TO(kScriptDataDoubleEscapedState); |
- } else if (IsASCIIUpper(cc)) { |
+ } else if (IsASCIIAlpha(cc)) { |
BufferCharacter(cc); |
- temporary_buffer_.push_back(ToLowerCase(cc)); |
- HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState); |
- } else if (IsASCIILower(cc)) { |
- BufferCharacter(cc); |
- temporary_buffer_.push_back(static_cast<LChar>(cc)); |
+ temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc))); |
HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState); |
} else |
HTML_RECONSUME_IN(kScriptDataDoubleEscapedState); |
@@ -753,17 +700,12 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kBeforeAttributeNameState) { |
- if (IsTokenizerWhitespace(cc)) |
+ if (IsTokenizerWhitespace(cc)) { |
HTML_ADVANCE_TO(kBeforeAttributeNameState); |
- else if (cc == '/') |
+ } else if (cc == '/') { |
HTML_ADVANCE_TO(kSelfClosingStartTagState); |
- else if (cc == '>') |
+ } else if (cc == '>') { |
return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
- else if (IsASCIIUpper(cc)) { |
- token_->AddNewAttribute(); |
- token_->BeginAttributeName(source.NumberOfCharactersConsumed()); |
- token_->AppendToAttributeName(ToLowerCase(cc)); |
- HTML_ADVANCE_TO(kAttributeNameState); |
} else if (cc == kEndOfFileMarker) { |
ParseError(); |
HTML_RECONSUME_IN(kDataState); |
@@ -772,7 +714,7 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
ParseError(); |
token_->AddNewAttribute(); |
token_->BeginAttributeName(source.NumberOfCharactersConsumed()); |
- token_->AppendToAttributeName(cc); |
+ token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc)); |
HTML_ADVANCE_TO(kAttributeNameState); |
} |
} |
@@ -791,9 +733,6 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
} else if (cc == '>') { |
token_->EndAttributeName(source.NumberOfCharactersConsumed()); |
return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
- } else if (IsASCIIUpper(cc)) { |
- token_->AppendToAttributeName(ToLowerCase(cc)); |
- HTML_ADVANCE_TO(kAttributeNameState); |
} else if (cc == kEndOfFileMarker) { |
ParseError(); |
token_->EndAttributeName(source.NumberOfCharactersConsumed()); |
@@ -801,26 +740,21 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
} else { |
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') |
ParseError(); |
- token_->AppendToAttributeName(cc); |
+ token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc)); |
HTML_ADVANCE_TO(kAttributeNameState); |
} |
} |
END_STATE() |
HTML_BEGIN_STATE(kAfterAttributeNameState) { |
- if (IsTokenizerWhitespace(cc)) |
+ if (IsTokenizerWhitespace(cc)) { |
HTML_ADVANCE_TO(kAfterAttributeNameState); |
- else if (cc == '/') |
+ } else if (cc == '/') { |
HTML_ADVANCE_TO(kSelfClosingStartTagState); |
- else if (cc == '=') |
+ } else if (cc == '=') { |
HTML_ADVANCE_TO(kBeforeAttributeValueState); |
- else if (cc == '>') |
+ } else if (cc == '>') { |
return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
- else if (IsASCIIUpper(cc)) { |
- token_->AddNewAttribute(); |
- token_->BeginAttributeName(source.NumberOfCharactersConsumed()); |
- token_->AppendToAttributeName(ToLowerCase(cc)); |
- HTML_ADVANCE_TO(kAttributeNameState); |
} else if (cc == kEndOfFileMarker) { |
ParseError(); |
HTML_RECONSUME_IN(kDataState); |
@@ -829,7 +763,7 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
ParseError(); |
token_->AddNewAttribute(); |
token_->BeginAttributeName(source.NumberOfCharactersConsumed()); |
- token_->AppendToAttributeName(cc); |
+ token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc)); |
HTML_ADVANCE_TO(kAttributeNameState); |
} |
} |
@@ -1154,11 +1088,8 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
END_STATE() |
HTML_BEGIN_STATE(kBeforeDOCTYPENameState) { |
- if (IsTokenizerWhitespace(cc)) |
+ if (IsTokenizerWhitespace(cc)) { |
HTML_ADVANCE_TO(kBeforeDOCTYPENameState); |
- else if (IsASCIIUpper(cc)) { |
- token_->BeginDOCTYPE(ToLowerCase(cc)); |
- HTML_ADVANCE_TO(kDOCTYPENameState); |
} else if (cc == '>') { |
ParseError(); |
token_->BeginDOCTYPE(); |
@@ -1170,26 +1101,23 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) { |
token_->SetForceQuirks(); |
return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
} else { |
- token_->BeginDOCTYPE(cc); |
+ token_->BeginDOCTYPE(ToLowerCaseIfAlpha(cc)); |
HTML_ADVANCE_TO(kDOCTYPENameState); |
} |
} |
END_STATE() |
HTML_BEGIN_STATE(kDOCTYPENameState) { |
- if (IsTokenizerWhitespace(cc)) |
+ if (IsTokenizerWhitespace(cc)) { |
HTML_ADVANCE_TO(kAfterDOCTYPENameState); |
- else if (cc == '>') |
+ } else if (cc == '>') { |
return EmitAndResumeIn(source, HTMLTokenizer::kDataState); |
- else if (IsASCIIUpper(cc)) { |
- token_->AppendToName(ToLowerCase(cc)); |
- HTML_ADVANCE_TO(kDOCTYPENameState); |
} else if (cc == kEndOfFileMarker) { |
ParseError(); |
token_->SetForceQuirks(); |
return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); |
} else { |
- token_->AppendToName(cc); |
+ token_->AppendToName(ToLowerCaseIfAlpha(cc)); |
HTML_ADVANCE_TO(kDOCTYPENameState); |
} |
} |