| Index: Source/core/html/parser/HTMLMetaCharsetParser.cpp
|
| diff --git a/Source/core/html/parser/HTMLMetaCharsetParser.cpp b/Source/core/html/parser/HTMLMetaCharsetParser.cpp
|
| index 7947846a1690040a6e38f4847ec319995c9e0151..038c8a1e5fb7e896335332768fd9b0dea299b629 100644
|
| --- a/Source/core/html/parser/HTMLMetaCharsetParser.cpp
|
| +++ b/Source/core/html/parser/HTMLMetaCharsetParser.cpp
|
| @@ -51,12 +51,61 @@ HTMLMetaCharsetParser::~HTMLMetaCharsetParser()
|
| {
|
| }
|
|
|
| +static const char charsetString[] = "charset";
|
| +static const size_t charsetLength = sizeof("charset") - 1;
|
| +
|
| +String HTMLMetaCharsetParser::extractCharset(const String& value)
|
| +{
|
| + size_t pos = 0;
|
| + unsigned length = value.length();
|
| +
|
| + while (pos < length) {
|
| + pos = value.find(charsetString, pos, false);
|
| + if (pos == kNotFound)
|
| + break;
|
| +
|
| + pos += charsetLength;
|
| +
|
| + // Skip whitespace.
|
| + while (pos < length && value[pos] <= ' ')
|
| + ++pos;
|
| +
|
| + if (value[pos] != '=')
|
| + continue;
|
| +
|
| + ++pos;
|
| +
|
| + while (pos < length && value[pos] <= ' ')
|
| + ++pos;
|
| +
|
| + char quoteMark = 0;
|
| + if (pos < length && (value[pos] == '"' || value[pos] == '\'')) {
|
| + quoteMark = static_cast<char>(value[pos++]);
|
| + ASSERT(!(quoteMark & 0x80));
|
| + }
|
| +
|
| + if (pos == length)
|
| + break;
|
| +
|
| + unsigned end = pos;
|
| + while (end < length && ((quoteMark && value[end] != quoteMark) || (!quoteMark && value[end] > ' ' && value[end] != '"' && value[end] != '\'' && value[end] != ';')))
|
| + ++end;
|
| +
|
| + if (quoteMark && (end == length))
|
| + break; // Close quote not found.
|
| +
|
| + return value.substring(pos, end - pos);
|
| + }
|
| +
|
| + return "";
|
| +}
|
| +
|
| bool HTMLMetaCharsetParser::processMeta()
|
| {
|
| const HTMLToken::AttributeList& tokenAttributes = m_token.attributes();
|
| - HTMLAttributeList attributes;
|
| + AttributeList attributes;
|
| for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin(); iter != tokenAttributes.end(); ++iter) {
|
| - String attributeName = attemptStaticStringCreation(iter->name, Likely8Bit);
|
| + String attributeName = StringImpl::create8BitIfPossible(iter->name);
|
| String attributeValue = StringImpl::create8BitIfPossible(iter->value);
|
| attributes.append(std::make_pair(attributeName, attributeValue));
|
| }
|
| @@ -65,6 +114,37 @@ bool HTMLMetaCharsetParser::processMeta()
|
| return m_encoding.isValid();
|
| }
|
|
|
| +WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const AttributeList& attributes)
|
| +{
|
| + bool gotPragma = false;
|
| + Mode mode = None;
|
| + String charset;
|
| +
|
| + for (AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
|
| + const AtomicString& attributeName = iter->first;
|
| + const String& attributeValue = iter->second;
|
| +
|
| + if (attributeName == http_equivAttr) {
|
| + if (equalIgnoringCase(attributeValue, "content-type"))
|
| + gotPragma = true;
|
| + } else if (charset.isEmpty()) {
|
| + if (attributeName == charsetAttr) {
|
| + charset = attributeValue;
|
| + mode = Charset;
|
| + } else if (attributeName == contentAttr) {
|
| + charset = extractCharset(attributeValue);
|
| + if (charset.length())
|
| + mode = Pragma;
|
| + }
|
| + }
|
| + }
|
| +
|
| + if (mode == Charset || (mode == Pragma && gotPragma))
|
| + return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));
|
| +
|
| + return WTF::TextEncoding();
|
| +}
|
| +
|
| static const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if <head> section is over.
|
|
|
| bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
|
| @@ -97,20 +177,20 @@ bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)
|
| while (m_tokenizer->nextToken(m_input, m_token)) {
|
| bool end = m_token.type() == HTMLToken::EndTag;
|
| if (end || m_token.type() == HTMLToken::StartTag) {
|
| - String tagName = attemptStaticStringCreation(m_token.name(), Likely8Bit);
|
| + AtomicString tagName(m_token.name());
|
| if (!end) {
|
| m_tokenizer->updateStateFor(tagName);
|
| - if (threadSafeMatch(tagName, metaTag) && processMeta()) {
|
| + if (tagName == metaTag && processMeta()) {
|
| m_doneChecking = true;
|
| return true;
|
| }
|
| }
|
|
|
| - if (!threadSafeMatch(tagName, scriptTag) && !threadSafeMatch(tagName, noscriptTag)
|
| - && !threadSafeMatch(tagName, styleTag) && !threadSafeMatch(tagName, linkTag)
|
| - && !threadSafeMatch(tagName, metaTag) && !threadSafeMatch(tagName, objectTag)
|
| - && !threadSafeMatch(tagName, titleTag) && !threadSafeMatch(tagName, baseTag)
|
| - && (end || !threadSafeMatch(tagName, htmlTag)) && (end || !threadSafeMatch(tagName, headTag))) {
|
| + if (tagName != scriptTag && tagName != noscriptTag
|
| + && tagName != styleTag && tagName != linkTag
|
| + && tagName != metaTag && tagName != objectTag
|
| + && tagName != titleTag && tagName != baseTag
|
| + && (end || tagName != htmlTag) && (end || tagName != headTag)) {
|
| m_inHeadSection = false;
|
| }
|
| }
|
|
|