Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(395)

Unified Diff: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

Issue 2827653003: HTMLTokenizer: Fold isASCIIUpper() / isASCIILower() cases (Closed)
Patch Set: Rebase Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
index ec8889f24a22d84b24bd1f445162454ecb33fa88..c023053c915542fba85ad95bf09d42e22e291f9d 100644
--- a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
+++ b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
@@ -45,9 +45,12 @@ namespace blink {
using namespace HTMLNames;
static inline UChar ToLowerCase(UChar cc) {
- DCHECK(IsASCIIUpper(cc));
- const int kLowerCaseOffset = 0x20;
- return cc + kLowerCaseOffset;
+ DCHECK(IsASCIIAlpha(cc));
+ return cc | 0x20;
+}
+
+static inline UChar ToLowerCaseIfAlpha(UChar cc) {
+ return cc | (IsASCIIUpper(cc) ? 0x20 : 0);
}
static inline bool VectorEqualsString(const Vector<LChar, 32>& vector,
@@ -235,16 +238,13 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kTagOpenState) {
- if (cc == '!')
+ if (cc == '!') {
HTML_ADVANCE_TO(kMarkupDeclarationOpenState);
- else if (cc == '/')
+ } else if (cc == '/') {
HTML_ADVANCE_TO(kEndTagOpenState);
- else if (IsASCIIUpper(cc)) {
+ } else if (IsASCIIAlpha(cc)) {
token_->BeginStartTag(ToLowerCase(cc));
HTML_ADVANCE_TO(kTagNameState);
- } else if (IsASCIILower(cc)) {
- token_->BeginStartTag(cc);
- HTML_ADVANCE_TO(kTagNameState);
} else if (cc == '?') {
ParseError();
// The spec consumes the current character before switching
@@ -260,14 +260,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kEndTagOpenState) {
- if (IsASCIIUpper(cc)) {
+ if (IsASCIIAlpha(cc)) {
token_->BeginEndTag(static_cast<LChar>(ToLowerCase(cc)));
appropriate_end_tag_name_.clear();
HTML_ADVANCE_TO(kTagNameState);
- } else if (IsASCIILower(cc)) {
- token_->BeginEndTag(static_cast<LChar>(cc));
- appropriate_end_tag_name_.clear();
- HTML_ADVANCE_TO(kTagNameState);
} else if (cc == '>') {
ParseError();
HTML_ADVANCE_TO(kDataState);
@@ -284,20 +280,17 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kTagNameState) {
- if (IsTokenizerWhitespace(cc))
+ if (IsTokenizerWhitespace(cc)) {
HTML_ADVANCE_TO(kBeforeAttributeNameState);
- else if (cc == '/')
+ } else if (cc == '/') {
HTML_ADVANCE_TO(kSelfClosingStartTagState);
- else if (cc == '>')
+ } else if (cc == '>') {
return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
- else if (IsASCIIUpper(cc)) {
- token_->AppendToName(ToLowerCase(cc));
- HTML_ADVANCE_TO(kTagNameState);
} else if (cc == kEndOfFileMarker) {
ParseError();
HTML_RECONSUME_IN(kDataState);
} else {
- token_->AppendToName(cc);
+ token_->AppendToName(ToLowerCaseIfAlpha(cc));
HTML_ADVANCE_TO(kTagNameState);
}
}
@@ -316,14 +309,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kRCDATAEndTagOpenState) {
- if (IsASCIIUpper(cc)) {
+ if (IsASCIIAlpha(cc)) {
temporary_buffer_.push_back(static_cast<LChar>(cc));
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kRCDATAEndTagNameState);
- } else if (IsASCIILower(cc)) {
- temporary_buffer_.push_back(static_cast<LChar>(cc));
- AddToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(kRCDATAEndTagNameState);
} else {
BufferCharacter('<');
BufferCharacter('/');
@@ -333,14 +322,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kRCDATAEndTagNameState) {
- if (IsASCIIUpper(cc)) {
+ if (IsASCIIAlpha(cc)) {
temporary_buffer_.push_back(static_cast<LChar>(cc));
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kRCDATAEndTagNameState);
- } else if (IsASCIILower(cc)) {
- temporary_buffer_.push_back(static_cast<LChar>(cc));
- AddToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(kRCDATAEndTagNameState);
} else {
if (IsTokenizerWhitespace(cc)) {
if (IsAppropriateEndTag()) {
@@ -381,14 +366,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kRAWTEXTEndTagOpenState) {
- if (IsASCIIUpper(cc)) {
+ if (IsASCIIAlpha(cc)) {
temporary_buffer_.push_back(static_cast<LChar>(cc));
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
- } else if (IsASCIILower(cc)) {
- temporary_buffer_.push_back(static_cast<LChar>(cc));
- AddToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
} else {
BufferCharacter('<');
BufferCharacter('/');
@@ -398,14 +379,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kRAWTEXTEndTagNameState) {
- if (IsASCIIUpper(cc)) {
+ if (IsASCIIAlpha(cc)) {
temporary_buffer_.push_back(static_cast<LChar>(cc));
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
- } else if (IsASCIILower(cc)) {
- temporary_buffer_.push_back(static_cast<LChar>(cc));
- AddToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
} else {
if (IsTokenizerWhitespace(cc)) {
if (IsAppropriateEndTag()) {
@@ -450,14 +427,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kScriptDataEndTagOpenState) {
- if (IsASCIIUpper(cc)) {
+ if (IsASCIIAlpha(cc)) {
temporary_buffer_.push_back(static_cast<LChar>(cc));
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kScriptDataEndTagNameState);
- } else if (IsASCIILower(cc)) {
- temporary_buffer_.push_back(static_cast<LChar>(cc));
- AddToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(kScriptDataEndTagNameState);
} else {
BufferCharacter('<');
BufferCharacter('/');
@@ -467,14 +440,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kScriptDataEndTagNameState) {
- if (IsASCIIUpper(cc)) {
+ if (IsASCIIAlpha(cc)) {
temporary_buffer_.push_back(static_cast<LChar>(cc));
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kScriptDataEndTagNameState);
- } else if (IsASCIILower(cc)) {
- temporary_buffer_.push_back(static_cast<LChar>(cc));
- AddToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(kScriptDataEndTagNameState);
} else {
if (IsTokenizerWhitespace(cc)) {
if (IsAppropriateEndTag()) {
@@ -576,17 +545,11 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
temporary_buffer_.clear();
DCHECK(buffered_end_tag_name_.IsEmpty());
HTML_ADVANCE_TO(kScriptDataEscapedEndTagOpenState);
- } else if (IsASCIIUpper(cc)) {
+ } else if (IsASCIIAlpha(cc)) {
BufferCharacter('<');
BufferCharacter(cc);
temporary_buffer_.clear();
- temporary_buffer_.push_back(ToLowerCase(cc));
- HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
- } else if (IsASCIILower(cc)) {
- BufferCharacter('<');
- BufferCharacter(cc);
- temporary_buffer_.clear();
- temporary_buffer_.push_back(static_cast<LChar>(cc));
+ temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
} else {
BufferCharacter('<');
@@ -596,14 +559,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kScriptDataEscapedEndTagOpenState) {
- if (IsASCIIUpper(cc)) {
+ if (IsASCIIAlpha(cc)) {
temporary_buffer_.push_back(static_cast<LChar>(cc));
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
- } else if (IsASCIILower(cc)) {
- temporary_buffer_.push_back(static_cast<LChar>(cc));
- AddToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
} else {
BufferCharacter('<');
BufferCharacter('/');
@@ -613,14 +572,10 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kScriptDataEscapedEndTagNameState) {
- if (IsASCIIUpper(cc)) {
+ if (IsASCIIAlpha(cc)) {
temporary_buffer_.push_back(static_cast<LChar>(cc));
AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
- } else if (IsASCIILower(cc)) {
- temporary_buffer_.push_back(static_cast<LChar>(cc));
- AddToPossibleEndTag(static_cast<LChar>(cc));
- HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
} else {
if (IsTokenizerWhitespace(cc)) {
if (IsAppropriateEndTag()) {
@@ -655,13 +610,9 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
HTML_ADVANCE_TO(kScriptDataDoubleEscapedState);
else
HTML_ADVANCE_TO(kScriptDataEscapedState);
- } else if (IsASCIIUpper(cc)) {
+ } else if (IsASCIIAlpha(cc)) {
BufferCharacter(cc);
- temporary_buffer_.push_back(ToLowerCase(cc));
- HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
- } else if (IsASCIILower(cc)) {
- BufferCharacter(cc);
- temporary_buffer_.push_back(static_cast<LChar>(cc));
+ temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
} else
HTML_RECONSUME_IN(kScriptDataEscapedState);
@@ -739,13 +690,9 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
HTML_ADVANCE_TO(kScriptDataEscapedState);
else
HTML_ADVANCE_TO(kScriptDataDoubleEscapedState);
- } else if (IsASCIIUpper(cc)) {
+ } else if (IsASCIIAlpha(cc)) {
BufferCharacter(cc);
- temporary_buffer_.push_back(ToLowerCase(cc));
- HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState);
- } else if (IsASCIILower(cc)) {
- BufferCharacter(cc);
- temporary_buffer_.push_back(static_cast<LChar>(cc));
+ temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc)));
HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState);
} else
HTML_RECONSUME_IN(kScriptDataDoubleEscapedState);
@@ -753,17 +700,12 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kBeforeAttributeNameState) {
- if (IsTokenizerWhitespace(cc))
+ if (IsTokenizerWhitespace(cc)) {
HTML_ADVANCE_TO(kBeforeAttributeNameState);
- else if (cc == '/')
+ } else if (cc == '/') {
HTML_ADVANCE_TO(kSelfClosingStartTagState);
- else if (cc == '>')
+ } else if (cc == '>') {
return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
- else if (IsASCIIUpper(cc)) {
- token_->AddNewAttribute();
- token_->BeginAttributeName(source.NumberOfCharactersConsumed());
- token_->AppendToAttributeName(ToLowerCase(cc));
- HTML_ADVANCE_TO(kAttributeNameState);
} else if (cc == kEndOfFileMarker) {
ParseError();
HTML_RECONSUME_IN(kDataState);
@@ -772,7 +714,7 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
ParseError();
token_->AddNewAttribute();
token_->BeginAttributeName(source.NumberOfCharactersConsumed());
- token_->AppendToAttributeName(cc);
+ token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc));
HTML_ADVANCE_TO(kAttributeNameState);
}
}
@@ -791,9 +733,6 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
} else if (cc == '>') {
token_->EndAttributeName(source.NumberOfCharactersConsumed());
return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
- } else if (IsASCIIUpper(cc)) {
- token_->AppendToAttributeName(ToLowerCase(cc));
- HTML_ADVANCE_TO(kAttributeNameState);
} else if (cc == kEndOfFileMarker) {
ParseError();
token_->EndAttributeName(source.NumberOfCharactersConsumed());
@@ -801,26 +740,21 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
ParseError();
- token_->AppendToAttributeName(cc);
+ token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc));
HTML_ADVANCE_TO(kAttributeNameState);
}
}
END_STATE()
HTML_BEGIN_STATE(kAfterAttributeNameState) {
- if (IsTokenizerWhitespace(cc))
+ if (IsTokenizerWhitespace(cc)) {
HTML_ADVANCE_TO(kAfterAttributeNameState);
- else if (cc == '/')
+ } else if (cc == '/') {
HTML_ADVANCE_TO(kSelfClosingStartTagState);
- else if (cc == '=')
+ } else if (cc == '=') {
HTML_ADVANCE_TO(kBeforeAttributeValueState);
- else if (cc == '>')
+ } else if (cc == '>') {
return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
- else if (IsASCIIUpper(cc)) {
- token_->AddNewAttribute();
- token_->BeginAttributeName(source.NumberOfCharactersConsumed());
- token_->AppendToAttributeName(ToLowerCase(cc));
- HTML_ADVANCE_TO(kAttributeNameState);
} else if (cc == kEndOfFileMarker) {
ParseError();
HTML_RECONSUME_IN(kDataState);
@@ -829,7 +763,7 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
ParseError();
token_->AddNewAttribute();
token_->BeginAttributeName(source.NumberOfCharactersConsumed());
- token_->AppendToAttributeName(cc);
+ token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc));
HTML_ADVANCE_TO(kAttributeNameState);
}
}
@@ -1154,11 +1088,8 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(kBeforeDOCTYPENameState) {
- if (IsTokenizerWhitespace(cc))
+ if (IsTokenizerWhitespace(cc)) {
HTML_ADVANCE_TO(kBeforeDOCTYPENameState);
- else if (IsASCIIUpper(cc)) {
- token_->BeginDOCTYPE(ToLowerCase(cc));
- HTML_ADVANCE_TO(kDOCTYPENameState);
} else if (cc == '>') {
ParseError();
token_->BeginDOCTYPE();
@@ -1170,26 +1101,23 @@ bool HTMLTokenizer::NextToken(SegmentedString& source, HTMLToken& token) {
token_->SetForceQuirks();
return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState);
} else {
- token_->BeginDOCTYPE(cc);
+ token_->BeginDOCTYPE(ToLowerCaseIfAlpha(cc));
HTML_ADVANCE_TO(kDOCTYPENameState);
}
}
END_STATE()
HTML_BEGIN_STATE(kDOCTYPENameState) {
- if (IsTokenizerWhitespace(cc))
+ if (IsTokenizerWhitespace(cc)) {
HTML_ADVANCE_TO(kAfterDOCTYPENameState);
- else if (cc == '>')
+ } else if (cc == '>') {
return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
- else if (IsASCIIUpper(cc)) {
- token_->AppendToName(ToLowerCase(cc));
- HTML_ADVANCE_TO(kDOCTYPENameState);
} else if (cc == kEndOfFileMarker) {
ParseError();
token_->SetForceQuirks();
return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState);
} else {
- token_->AppendToName(cc);
+ token_->AppendToName(ToLowerCaseIfAlpha(cc));
HTML_ADVANCE_TO(kDOCTYPENameState);
}
}
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698