Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1203)

Unified Diff: Source/core/html/parser/HTMLMetaCharsetParser.cpp

Issue 133273007: Revert "Moved text decoding to the parser thread" (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « Source/core/html/parser/HTMLMetaCharsetParser.h ('k') | Source/core/html/parser/HTMLParserIdioms.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: Source/core/html/parser/HTMLMetaCharsetParser.cpp
diff --git a/Source/core/html/parser/HTMLMetaCharsetParser.cpp b/Source/core/html/parser/HTMLMetaCharsetParser.cpp
index 7947846a1690040a6e38f4847ec319995c9e0151..249e98bd9f3b07bbcdfacbd0fb69c2b094af12b4 100644
--- a/Source/core/html/parser/HTMLMetaCharsetParser.cpp
+++ b/Source/core/html/parser/HTMLMetaCharsetParser.cpp
@@ -51,12 +51,61 @@ HTMLMetaCharsetParser::~HTMLMetaCharsetParser()
{
}
+static const char charsetString[] = "charset";
+static const size_t charsetLength = sizeof("charset") - 1;
+
+String HTMLMetaCharsetParser::extractCharset(const String& value)
+{
+ size_t pos = 0;
+ unsigned length = value.length();
+
+ while (pos < length) {
+ pos = value.find(charsetString, pos, false);
+ if (pos == kNotFound)
+ break;
+
+ pos += charsetLength;
+
+ // Skip whitespace.
+ while (pos < length && value[pos] <= ' ')
+ ++pos;
+
+ if (value[pos] != '=')
+ continue;
+
+ ++pos;
+
+ while (pos < length && value[pos] <= ' ')
+ ++pos;
+
+ char quoteMark = 0;
+ if (pos < length && (value[pos] == '"' || value[pos] == '\'')) {
+ quoteMark = static_cast<char>(value[pos++]);
+ ASSERT(!(quoteMark & 0x80));
+ }
+
+ if (pos == length)
+ break;
+
+ unsigned end = pos;
+ while (end < length && ((quoteMark && value[end] != quoteMark) || (!quoteMark && value[end] > ' ' && value[end] != '"' && value[end] != '\'' && value[end] != ';')))
+ ++end;
+
+ if (quoteMark && (end == length))
+ break; // Close quote not found.
+
+ return value.substring(pos, end - pos);
+ }
+
+ return "";
+}
+
bool HTMLMetaCharsetParser::processMeta()
{
const HTMLToken::AttributeList& tokenAttributes = m_token.attributes();
- HTMLAttributeList attributes;
+ AttributeList attributes;
for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin(); iter != tokenAttributes.end(); ++iter) {
- String attributeName = attemptStaticStringCreation(iter->name, Likely8Bit);
+ String attributeName = StringImpl::create8BitIfPossible(iter->name);
String attributeValue = StringImpl::create8BitIfPossible(iter->value);
attributes.append(std::make_pair(attributeName, attributeValue));
}
@@ -65,6 +114,37 @@ bool HTMLMetaCharsetParser::processMeta()
return m_encoding.isValid();
}
+WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const AttributeList& attributes)
+{
+ bool gotPragma = false;
+ Mode mode = None;
+ String charset;
+
+ for (AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
+ const AtomicString& attributeName = AtomicString(iter->first);
+ const String& attributeValue = iter->second;
+
+ if (attributeName == http_equivAttr) {
+ if (equalIgnoringCase(attributeValue, "content-type"))
+ gotPragma = true;
+ } else if (charset.isEmpty()) {
+ if (attributeName == charsetAttr) {
+ charset = attributeValue;
+ mode = Charset;
+ } else if (attributeName == contentAttr) {
+ charset = extractCharset(attributeValue);
+ if (charset.length())
+ mode = Pragma;
+ }
+ }
+ }
+
+ if (mode == Charset || (mode == Pragma && gotPragma))
+ return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));
+
+ return WTF::TextEncoding();
+}
+
static const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if <head> section is over.
bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
@@ -97,20 +177,20 @@ bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
while (m_tokenizer->nextToken(m_input, m_token)) {
bool end = m_token.type() == HTMLToken::EndTag;
if (end || m_token.type() == HTMLToken::StartTag) {
- String tagName = attemptStaticStringCreation(m_token.name(), Likely8Bit);
+ AtomicString tagName(m_token.name());
if (!end) {
m_tokenizer->updateStateFor(tagName);
- if (threadSafeMatch(tagName, metaTag) && processMeta()) {
+ if (tagName == metaTag && processMeta()) {
m_doneChecking = true;
return true;
}
}
- if (!threadSafeMatch(tagName, scriptTag) && !threadSafeMatch(tagName, noscriptTag)
- && !threadSafeMatch(tagName, styleTag) && !threadSafeMatch(tagName, linkTag)
- && !threadSafeMatch(tagName, metaTag) && !threadSafeMatch(tagName, objectTag)
- && !threadSafeMatch(tagName, titleTag) && !threadSafeMatch(tagName, baseTag)
- && (end || !threadSafeMatch(tagName, htmlTag)) && (end || !threadSafeMatch(tagName, headTag))) {
+ if (tagName != scriptTag && tagName != noscriptTag
+ && tagName != styleTag && tagName != linkTag
+ && tagName != metaTag && tagName != objectTag
+ && tagName != titleTag && tagName != baseTag
+ && (end || tagName != htmlTag) && (end || tagName != headTag)) {
m_inHeadSection = false;
}
}
« no previous file with comments | « Source/core/html/parser/HTMLMetaCharsetParser.h ('k') | Source/core/html/parser/HTMLParserIdioms.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698