Source/core/html/parser/HTMLMetaCharsetParser.cpp - Issue 74513003: Moved text decoding to the parser thread

Side by Side Diff: Source/core/html/parser/HTMLMetaCharsetParser.cpp

Issue 74513003: Moved text decoding to the parser thread (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@parserthread_step25

Patch Set: Removed AtomicString from HTMLMetaCharsetParser Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« Source/core/html/parser/HTMLIdentifier.h ('K') | « Source/core/html/parser/HTMLMetaCharsetParser.h ('k') | Source/core/html/parser/HTMLPreloadScanner.cpp » ('j') | Source/core/html/parser/HTMLTokenizer.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2010 Google Inc. All Rights Reserved.	2 * Copyright (C) 2010 Google Inc. All Rights Reserved.

3 *	3 *

4 * Redistribution and use in source and binary forms, with or without	4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions	5 * modification, are permitted provided that the following conditions

6 * are met:	6 * are met:

7 * 1. Redistributions of source code must retain the above copyright	7 * 1. Redistributions of source code must retain the above copyright

8 * notice, this list of conditions and the following disclaimer.	8 * notice, this list of conditions and the following disclaimer.

9 * 2. Redistributions in binary form must reproduce the above copyright	9 * 2. Redistributions in binary form must reproduce the above copyright

10 * notice, this list of conditions and the following disclaimer in the	10 * notice, this list of conditions and the following disclaimer in the

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
99 }	99 }

100	100

101 return "";	101 return "";

102 }	102 }

103	103

104 bool HTMLMetaCharsetParser::processMeta()	104 bool HTMLMetaCharsetParser::processMeta()

105 {	105 {

106 const HTMLToken::AttributeList& tokenAttributes = m_token.attributes();	106 const HTMLToken::AttributeList& tokenAttributes = m_token.attributes();

107 AttributeList attributes;	107 AttributeList attributes;

108 for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin() ; iter != tokenAttributes.end(); ++iter) {	108 for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin() ; iter != tokenAttributes.end(); ++iter) {

109 String attributeName = StringImpl::create8BitIfPossible(iter->name);	109 HTMLIdentifier attributeName(iter->name, Likely8Bit);

110 String attributeValue = StringImpl::create8BitIfPossible(iter->value);	110 String attributeValue = StringImpl::create8BitIfPossible(iter->value);

111 attributes.append(std::make_pair(attributeName, attributeValue));	111 attributes.append(std::make_pair(attributeName, attributeValue));

112 }	112 }

113	113

114 m_encoding = encodingFromMetaAttributes(attributes);	114 m_encoding = encodingFromMetaAttributes(attributes);

115 return m_encoding.isValid();	115 return m_encoding.isValid();

116 }	116 }

117	117

118 WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const Attrib uteList& attributes)	118 WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const Attrib uteList& attributes)

119 {	119 {

120 bool gotPragma = false;	120 bool gotPragma = false;

121 Mode mode = None;	121 Mode mode = None;

122 String charset;	122 String charset;

123	123

124 for (AttributeList::const_iterator iter = attributes.begin(); iter != attrib utes.end(); ++iter) {	124 for (AttributeList::const_iterator iter = attributes.begin(); iter != attrib utes.end(); ++iter) {

125 const AtomicString& attributeName = iter->first;	125 const HTMLIdentifier& attributeName = iter->first;

126 const String& attributeValue = iter->second;	126 const String& attributeValue = iter->second;

127	127

128 if (attributeName == http_equivAttr) {	128 if (threadSafeMatch(attributeName, http_equivAttr)) {

129 if (equalIgnoringCase(attributeValue, "content-type"))	129 if (equalIgnoringCase(attributeValue, "content-type"))

130 gotPragma = true;	130 gotPragma = true;

131 } else if (charset.isEmpty()) {	131 } else if (charset.isEmpty()) {

132 if (attributeName == charsetAttr) {	132 if (threadSafeMatch(attributeName, charsetAttr)) {

133 charset = attributeValue;	133 charset = attributeValue;

134 mode = Charset;	134 mode = Charset;

135 } else if (attributeName == contentAttr) {	135 } else if (threadSafeMatch(attributeName, contentAttr)) {

136 charset = extractCharset(attributeValue);	136 charset = extractCharset(attributeValue);

137 if (charset.length())	137 if (charset.length())

138 mode = Pragma;	138 mode = Pragma;

139 }	139 }

140 }	140 }

141 }	141 }

142	142

143 if (mode == Charset \|\| (mode == Pragma && gotPragma))	143 if (mode == Charset \|\| (mode == Pragma && gotPragma))

144 return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));	144 return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));

145	145

(...skipping 25 matching lines...) Expand all Loading...
171	171

172 // Since many sites have charset declarations after <body> or other tags	172 // Since many sites have charset declarations after <body> or other tags

173 // that are disallowed in <head>, we don't bail out until we've checked at	173 // that are disallowed in <head>, we don't bail out until we've checked at

174 // least bytesToCheckUnconditionally bytes of input.	174 // least bytesToCheckUnconditionally bytes of input.

175	175

176 m_input.append(SegmentedString(m_assumedCodec->decode(data, length)));	176 m_input.append(SegmentedString(m_assumedCodec->decode(data, length)));

177	177

178 while (m_tokenizer->nextToken(m_input, m_token)) {	178 while (m_tokenizer->nextToken(m_input, m_token)) {

179 bool end = m_token.type() == HTMLToken::EndTag;	179 bool end = m_token.type() == HTMLToken::EndTag;

180 if (end \|\| m_token.type() == HTMLToken::StartTag) {	180 if (end \|\| m_token.type() == HTMLToken::StartTag) {

181 AtomicString tagName(m_token.name());	181 HTMLIdentifier tagName(m_token.name(), Likely8Bit);

182 if (!end) {	182 if (!end) {

183 m_tokenizer->updateStateFor(tagName);	183 m_tokenizer->updateStateFor(tagName);

184 if (tagName == metaTag && processMeta()) {	184 if (tagName == metaTag && processMeta()) {

185 m_doneChecking = true;	185 m_doneChecking = true;

186 return true;	186 return true;

187 }	187 }

188 }	188 }

189	189

190 if (tagName != scriptTag && tagName != noscriptTag	190 if (tagName != scriptTag && tagName != noscriptTag

191 && tagName != styleTag && tagName != linkTag	191 && tagName != styleTag && tagName != linkTag

192 && tagName != metaTag && tagName != objectTag	192 && tagName != metaTag && tagName != objectTag

193 && tagName != titleTag && tagName != baseTag	193 && tagName != titleTag && tagName != baseTag

194 && (end \|\| tagName != htmlTag) && (end \|\| tagName != headTag)) {	194 && (end \|\| tagName != htmlTag) && (end \|\| tagName != headTag)) {

195 m_inHeadSection = false;	195 m_inHeadSection = false;

196 }	196 }

197 }	197 }

198	198

199 if (!m_inHeadSection && m_input.numberOfCharactersConsumed() >= bytesToC heckUnconditionally) {	199 if (!m_inHeadSection && m_input.numberOfCharactersConsumed() >= bytesToC heckUnconditionally) {

200 m_doneChecking = true;	200 m_doneChecking = true;

201 return true;	201 return true;

202 }	202 }

203	203

204 m_token.clear();	204 m_token.clear();

205 }	205 }

206	206

207 return false;	207 return false;

208 }	208 }

209	209

210 }	210 }

OLD	NEW