Source/core/html/parser/HTMLMetaCharsetParser.cpp - Issue 133273007: Revert "Moved text decoding to the parser thread"

Unified Diff: Source/core/html/parser/HTMLMetaCharsetParser.cpp

Issue 133273007: Revert "Moved text decoding to the parser thread" (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: Source/core/html/parser/HTMLMetaCharsetParser.cpp

diff --git a/Source/core/html/parser/HTMLMetaCharsetParser.cpp b/Source/core/html/parser/HTMLMetaCharsetParser.cpp

index 7947846a1690040a6e38f4847ec319995c9e0151..249e98bd9f3b07bbcdfacbd0fb69c2b094af12b4 100644

--- a/Source/core/html/parser/HTMLMetaCharsetParser.cpp

+++ b/Source/core/html/parser/HTMLMetaCharsetParser.cpp

@@ -51,12 +51,61 @@ HTMLMetaCharsetParser::~HTMLMetaCharsetParser()

{

}

+static const char charsetString[] = "charset";

+static const size_t charsetLength = sizeof("charset") - 1;

+String HTMLMetaCharsetParser::extractCharset(const String& value)

+ size_t pos = 0;

+ unsigned length = value.length();

+ while (pos < length) {

+ pos = value.find(charsetString, pos, false);

+ if (pos == kNotFound)

+ break;

+ pos += charsetLength;

+ // Skip whitespace.

+ while (pos < length && value[pos] <= ' ')

+ ++pos;

+ if (value[pos] != '=')

+ continue;

+ ++pos;

+ while (pos < length && value[pos] <= ' ')

+ ++pos;

+ char quoteMark = 0;

+ if (pos < length && (value[pos] == '"' || value[pos] == '\'')) {

+ quoteMark = static_cast<char>(value[pos++]);

+ ASSERT(!(quoteMark & 0x80));

+ }

+ if (pos == length)

+ break;

+ unsigned end = pos;

+ while (end < length && ((quoteMark && value[end] != quoteMark) || (!quoteMark && value[end] > ' ' && value[end] != '"' && value[end] != '\'' && value[end] != ';')))

+ ++end;

+ if (quoteMark && (end == length))

+ break; // Close quote not found.

+ return value.substring(pos, end - pos);

+ }

+ return "";

bool HTMLMetaCharsetParser::processMeta()

{

const HTMLToken::AttributeList& tokenAttributes = m_token.attributes();

- HTMLAttributeList attributes;

+ AttributeList attributes;

for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin(); iter != tokenAttributes.end(); ++iter) {

- String attributeName = attemptStaticStringCreation(iter->name, Likely8Bit);

+ String attributeName = StringImpl::create8BitIfPossible(iter->name);

String attributeValue = StringImpl::create8BitIfPossible(iter->value);

attributes.append(std::make_pair(attributeName, attributeValue));

}

@@ -65,6 +114,37 @@ bool HTMLMetaCharsetParser::processMeta()

return m_encoding.isValid();

}

+WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const AttributeList& attributes)

+ bool gotPragma = false;

+ Mode mode = None;

+ String charset;

+ for (AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {

+ const AtomicString& attributeName = AtomicString(iter->first);

+ const String& attributeValue = iter->second;

+ if (attributeName == http_equivAttr) {

+ if (equalIgnoringCase(attributeValue, "content-type"))

+ gotPragma = true;

+ } else if (charset.isEmpty()) {

+ if (attributeName == charsetAttr) {

+ charset = attributeValue;

+ mode = Charset;

+ } else if (attributeName == contentAttr) {

+ charset = extractCharset(attributeValue);

+ if (charset.length())

+ mode = Pragma;

+ }

+ if (mode == Charset || (mode == Pragma && gotPragma))

+ return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));

+ return WTF::TextEncoding();

static const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if <head> section is over.

bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)

@@ -97,20 +177,20 @@ bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)

while (m_tokenizer->nextToken(m_input, m_token)) {

bool end = m_token.type() == HTMLToken::EndTag;

if (end || m_token.type() == HTMLToken::StartTag) {

- String tagName = attemptStaticStringCreation(m_token.name(), Likely8Bit);

+ AtomicString tagName(m_token.name());

if (!end) {

m_tokenizer->updateStateFor(tagName);

- if (threadSafeMatch(tagName, metaTag) && processMeta()) {

+ if (tagName == metaTag && processMeta()) {

m_doneChecking = true;

return true;

}

- if (!threadSafeMatch(tagName, scriptTag) && !threadSafeMatch(tagName, noscriptTag)

- && !threadSafeMatch(tagName, styleTag) && !threadSafeMatch(tagName, linkTag)

- && !threadSafeMatch(tagName, metaTag) && !threadSafeMatch(tagName, objectTag)

- && !threadSafeMatch(tagName, titleTag) && !threadSafeMatch(tagName, baseTag)

- && (end || !threadSafeMatch(tagName, htmlTag)) && (end || !threadSafeMatch(tagName, headTag))) {

+ if (tagName != scriptTag && tagName != noscriptTag

+ && tagName != styleTag && tagName != linkTag

+ && tagName != metaTag && tagName != objectTag

+ && tagName != titleTag && tagName != baseTag

+ && (end || tagName != htmlTag) && (end || tagName != headTag)) {

m_inHeadSection = false;

}

« no previous file with comments | « Source/core/html/parser/HTMLMetaCharsetParser.h ('k') | Source/core/html/parser/HTMLParserIdioms.h » ('j') | no next file with comments »