Source/core/html/parser/HTMLMetaCharsetParser.cpp - Issue 133273007: Revert "Moved text decoding to the parser thread"

Side by Side Diff: Source/core/html/parser/HTMLMetaCharsetParser.cpp

Issue 133273007: Revert "Moved text decoding to the parser thread" (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2010 Google Inc. All Rights Reserved.	2 * Copyright (C) 2010 Google Inc. All Rights Reserved.

3 *	3 *

4 * Redistribution and use in source and binary forms, with or without	4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions	5 * modification, are permitted provided that the following conditions

6 * are met:	6 * are met:

7 * 1. Redistributions of source code must retain the above copyright	7 * 1. Redistributions of source code must retain the above copyright

8 * notice, this list of conditions and the following disclaimer.	8 * notice, this list of conditions and the following disclaimer.

9 * 2. Redistributions in binary form must reproduce the above copyright	9 * 2. Redistributions in binary form must reproduce the above copyright

10 * notice, this list of conditions and the following disclaimer in the	10 * notice, this list of conditions and the following disclaimer in the

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
44 , m_assumedCodec(newTextCodec(Latin1Encoding()))	44 , m_assumedCodec(newTextCodec(Latin1Encoding()))

45 , m_inHeadSection(true)	45 , m_inHeadSection(true)

46 , m_doneChecking(false)	46 , m_doneChecking(false)

47 {	47 {

48 }	48 }

49	49

50 HTMLMetaCharsetParser::~HTMLMetaCharsetParser()	50 HTMLMetaCharsetParser::~HTMLMetaCharsetParser()

51 {	51 {

52 }	52 }

53	53

	54 static const char charsetString[] = "charset";

	55 static const size_t charsetLength = sizeof("charset") - 1;

	56

	57 String HTMLMetaCharsetParser::extractCharset(const String& value)

	58 {

	59 size_t pos = 0;

	60 unsigned length = value.length();

	61

	62 while (pos < length) {

	63 pos = value.find(charsetString, pos, false);

	64 if (pos == kNotFound)

	65 break;

	66

	67 pos += charsetLength;

	68

	69 // Skip whitespace.

	70 while (pos < length && value[pos] <= ' ')

	71 ++pos;

	72

	73 if (value[pos] != '=')

	74 continue;

	75

	76 ++pos;

	77

	78 while (pos < length && value[pos] <= ' ')

	79 ++pos;

	80

	81 char quoteMark = 0;

	82 if (pos < length && (value[pos] == '"' \|\| value[pos] == '\'')) {

	83 quoteMark = static_cast<char>(value[pos++]);

	84 ASSERT(!(quoteMark & 0x80));

	85 }

	86

	87 if (pos == length)

	88 break;

	89

	90 unsigned end = pos;

	91 while (end < length && ((quoteMark && value[end] != quoteMark) \|\| (!quot eMark && value[end] > ' ' && value[end] != '"' && value[end] != '\'' && value[en d] != ';')))

	92 ++end;

	93

	94 if (quoteMark && (end == length))

	95 break; // Close quote not found.

	96

	97 return value.substring(pos, end - pos);

	98 }

	99

	100 return "";

	101 }

	102

54 bool HTMLMetaCharsetParser::processMeta()	103 bool HTMLMetaCharsetParser::processMeta()

55 {	104 {

56 const HTMLToken::AttributeList& tokenAttributes = m_token.attributes();	105 const HTMLToken::AttributeList& tokenAttributes = m_token.attributes();

57 HTMLAttributeList attributes;	106 AttributeList attributes;

58 for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin() ; iter != tokenAttributes.end(); ++iter) {	107 for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin() ; iter != tokenAttributes.end(); ++iter) {

59 String attributeName = attemptStaticStringCreation(iter->name, Likely8Bi t);	108 String attributeName = StringImpl::create8BitIfPossible(iter->name);

60 String attributeValue = StringImpl::create8BitIfPossible(iter->value);	109 String attributeValue = StringImpl::create8BitIfPossible(iter->value);

61 attributes.append(std::make_pair(attributeName, attributeValue));	110 attributes.append(std::make_pair(attributeName, attributeValue));

62 }	111 }

63	112

64 m_encoding = encodingFromMetaAttributes(attributes);	113 m_encoding = encodingFromMetaAttributes(attributes);

65 return m_encoding.isValid();	114 return m_encoding.isValid();

66 }	115 }

67	116

	117 WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const Attrib uteList& attributes)

	118 {

	119 bool gotPragma = false;

	120 Mode mode = None;

	121 String charset;

	122

	123 for (AttributeList::const_iterator iter = attributes.begin(); iter != attrib utes.end(); ++iter) {

	124 const AtomicString& attributeName = AtomicString(iter->first);

	125 const String& attributeValue = iter->second;

	126

	127 if (attributeName == http_equivAttr) {

	128 if (equalIgnoringCase(attributeValue, "content-type"))

	129 gotPragma = true;

	130 } else if (charset.isEmpty()) {

	131 if (attributeName == charsetAttr) {

	132 charset = attributeValue;

	133 mode = Charset;

	134 } else if (attributeName == contentAttr) {

	135 charset = extractCharset(attributeValue);

	136 if (charset.length())

	137 mode = Pragma;

	138 }

	139 }

	140 }

	141

	142 if (mode == Charset \|\| (mode == Pragma && gotPragma))

	143 return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));

	144

	145 return WTF::TextEncoding();

	146 }

	147

68 static const int bytesToCheckUnconditionally = 1024; // That many input bytes wi ll be checked for meta charset even if <head> section is over.	148 static const int bytesToCheckUnconditionally = 1024; // That many input bytes wi ll be checked for meta charset even if <head> section is over.

69	149

70 bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)	150 bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, size_t length)

71 {	151 {

72 if (m_doneChecking)	152 if (m_doneChecking)

73 return true;	153 return true;

74	154

75 ASSERT(!m_encoding.isValid());	155 ASSERT(!m_encoding.isValid());

76	156

77 // We still don't have an encoding, and are in the head.	157 // We still don't have an encoding, and are in the head.

(...skipping 12 matching lines...) Expand all Loading...
90	170

91 // Since many sites have charset declarations after <body> or other tags	171 // Since many sites have charset declarations after <body> or other tags

92 // that are disallowed in <head>, we don't bail out until we've checked at	172 // that are disallowed in <head>, we don't bail out until we've checked at

93 // least bytesToCheckUnconditionally bytes of input.	173 // least bytesToCheckUnconditionally bytes of input.

94	174

95 m_input.append(SegmentedString(m_assumedCodec->decode(data, length)));	175 m_input.append(SegmentedString(m_assumedCodec->decode(data, length)));

96	176

97 while (m_tokenizer->nextToken(m_input, m_token)) {	177 while (m_tokenizer->nextToken(m_input, m_token)) {

98 bool end = m_token.type() == HTMLToken::EndTag;	178 bool end = m_token.type() == HTMLToken::EndTag;

99 if (end \|\| m_token.type() == HTMLToken::StartTag) {	179 if (end \|\| m_token.type() == HTMLToken::StartTag) {

100 String tagName = attemptStaticStringCreation(m_token.name(), Likely8 Bit);	180 AtomicString tagName(m_token.name());

101 if (!end) {	181 if (!end) {

102 m_tokenizer->updateStateFor(tagName);	182 m_tokenizer->updateStateFor(tagName);

103 if (threadSafeMatch(tagName, metaTag) && processMeta()) {	183 if (tagName == metaTag && processMeta()) {

104 m_doneChecking = true;	184 m_doneChecking = true;

105 return true;	185 return true;

106 }	186 }

107 }	187 }

108	188

109 if (!threadSafeMatch(tagName, scriptTag) && !threadSafeMatch(tagName , noscriptTag)	189 if (tagName != scriptTag && tagName != noscriptTag

110 && !threadSafeMatch(tagName, styleTag) && !threadSafeMatch(tagNa me, linkTag)	190 && tagName != styleTag && tagName != linkTag

111 && !threadSafeMatch(tagName, metaTag) && !threadSafeMatch(tagNam e, objectTag)	191 && tagName != metaTag && tagName != objectTag

112 && !threadSafeMatch(tagName, titleTag) && !threadSafeMatch(tagNa me, baseTag)	192 && tagName != titleTag && tagName != baseTag

113 && (end \|\| !threadSafeMatch(tagName, htmlTag)) && (end \|\| !threa dSafeMatch(tagName, headTag))) {	193 && (end \|\| tagName != htmlTag) && (end \|\| tagName != headTag)) {

114 m_inHeadSection = false;	194 m_inHeadSection = false;

115 }	195 }

116 }	196 }

117	197

118 if (!m_inHeadSection && m_input.numberOfCharactersConsumed() >= bytesToC heckUnconditionally) {	198 if (!m_inHeadSection && m_input.numberOfCharactersConsumed() >= bytesToC heckUnconditionally) {

119 m_doneChecking = true;	199 m_doneChecking = true;

120 return true;	200 return true;

121 }	201 }

122	202

123 m_token.clear();	203 m_token.clear();

124 }	204 }

125	205

126 return false;	206 return false;

127 }	207 }

128	208

129 }	209 }

OLD	NEW

« no previous file with comments | « Source/core/html/parser/HTMLMetaCharsetParser.h ('k') | Source/core/html/parser/HTMLParserIdioms.h » ('j') | no next file with comments »