third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp - Issue 2386893002: Reformat comments in core/html/parser

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp

Issue 2386893002: Reformat comments in core/html/parser (Closed)

Patch Set: self review Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/WebKit/Source/core/html/parser/TextDocumentParser.cpp ('k') | third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)	2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)

3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.	3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All

	4 rights reserved.

4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)	5 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)

5	6

6 This library is free software; you can redistribute it and/or	7 This library is free software; you can redistribute it and/or

7 modify it under the terms of the GNU Library General Public	8 modify it under the terms of the GNU Library General Public

8 License as published by the Free Software Foundation; either	9 License as published by the Free Software Foundation; either

9 version 2 of the License, or (at your option) any later version.	10 version 2 of the License, or (at your option) any later version.

10	11

11 This library is distributed in the hope that it will be useful,	12 This library is distributed in the hope that it will be useful,

12 but WITHOUT ANY WARRANTY; without even the implied warranty of	13 but WITHOUT ANY WARRANTY; without even the implied warranty of

13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU	14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
78 char b4,	79 char b4,

79 char b5,	80 char b5,

80 char b6,	81 char b6,

81 char b7,	82 char b7,

82 char b8,	83 char b8,

83 char b9) {	84 char b9) {

84 return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 &&	85 return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 &&

85 p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9;	86 p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9;

86 }	87 }

87	88

88 // You might think we should put these find functions elsewhere, perhaps with th e	89 // You might think we should put these find functions elsewhere, perhaps with

89 // similar functions that operate on UChar, but arguably only the decoder has	90 // the similar functions that operate on UChar, but arguably only the decoder

90 // a reason to process strings of char rather than UChar.	91 // has a reason to process strings of char rather than UChar.

91	92

92 static int find(const char* subject, size_t subjectLength, const char* target) {	93 static int find(const char* subject, size_t subjectLength, const char* target) {

93 size_t targetLength = strlen(target);	94 size_t targetLength = strlen(target);

94 if (targetLength > subjectLength)	95 if (targetLength > subjectLength)

95 return -1;	96 return -1;

96 for (size_t i = 0; i <= subjectLength - targetLength; ++i) {	97 for (size_t i = 0; i <= subjectLength - targetLength; ++i) {

97 bool match = true;	98 bool match = true;

98 for (size_t j = 0; j < targetLength; ++j) {	99 for (size_t j = 0; j < targetLength; ++j) {

99 if (subject[i + j] != target[j]) {	100 if (subject[i + j] != target[j]) {

100 match = false;	101 match = false;

(...skipping 21 matching lines...) Expand all Loading...
122 if (equalIgnoringCase(mimeType, "text/html"))	123 if (equalIgnoringCase(mimeType, "text/html"))

123 return HTMLContent;	124 return HTMLContent;

124 if (DOMImplementation::isXMLMIMEType(mimeType))	125 if (DOMImplementation::isXMLMIMEType(mimeType))

125 return XMLContent;	126 return XMLContent;

126 return PlainTextContent;	127 return PlainTextContent;

127 }	128 }

128	129

129 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(	130 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(

130 ContentType contentType,	131 ContentType contentType,

131 const WTF::TextEncoding& specifiedDefaultEncoding) {	132 const WTF::TextEncoding& specifiedDefaultEncoding) {

132 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 in stead of US-ASCII	133 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8

133 // for text/xml. This matches Firefox.	134 // instead of US-ASCII for text/xml. This matches Firefox.

134 if (contentType == XMLContent)	135 if (contentType == XMLContent)

135 return UTF8Encoding();	136 return UTF8Encoding();

136 if (!specifiedDefaultEncoding.isValid())	137 if (!specifiedDefaultEncoding.isValid())

137 return Latin1Encoding();	138 return Latin1Encoding();

138 return specifiedDefaultEncoding;	139 return specifiedDefaultEncoding;

139 }	140 }

140	141

141 TextResourceDecoder::TextResourceDecoder(	142 TextResourceDecoder::TextResourceDecoder(

142 const String& mimeType,	143 const String& mimeType,

143 const WTF::TextEncoding& specifiedDefaultEncoding,	144 const WTF::TextEncoding& specifiedDefaultEncoding,

(...skipping 10 matching lines...) Expand all Loading...
154 m_sawError(false),	155 m_sawError(false),

155 m_encodingDetectionOption(encodingDetectionOption) {	156 m_encodingDetectionOption(encodingDetectionOption) {

156 if (m_encodingDetectionOption == AlwaysUseUTF8ForText)	157 if (m_encodingDetectionOption == AlwaysUseUTF8ForText)

157 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding());	158 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding());

158 }	159 }

159	160

160 TextResourceDecoder::~TextResourceDecoder() {}	161 TextResourceDecoder::~TextResourceDecoder() {}

161	162

162 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding,	163 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding,

163 EncodingSource source) {	164 EncodingSource source) {

164 // In case the encoding didn't exist, we keep the old one (helps some sites sp ecifying invalid encodings).	165 // In case the encoding didn't exist, we keep the old one (helps some sites

	166 // specifying invalid encodings).

165 if (!encoding.isValid())	167 if (!encoding.isValid())

166 return;	168 return;

167	169

168 // When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR ),	170 // When encoding comes from meta tag (i.e. it cannot be XML files sent via

169 // treat x-user-defined as windows-1252 (bug 18270)	171 // XHR), treat x-user-defined as windows-1252 (bug 18270)

170 if (source == EncodingFromMetaTag &&	172 if (source == EncodingFromMetaTag &&

171 !strcasecmp(encoding.name(), "x-user-defined"))	173 !strcasecmp(encoding.name(), "x-user-defined"))

172 m_encoding = "windows-1252";	174 m_encoding = "windows-1252";

173 else if (source == EncodingFromMetaTag \|\| source == EncodingFromXMLHeader \|\|	175 else if (source == EncodingFromMetaTag \|\| source == EncodingFromXMLHeader \|\|

174 source == EncodingFromCSSCharset)	176 source == EncodingFromCSSCharset)

175 m_encoding = encoding.closestByteBasedEquivalent();	177 m_encoding = encoding.closestByteBasedEquivalent();

176 else	178 else

177 m_encoding = encoding;	179 m_encoding = encoding;

178	180

179 m_codec.reset();	181 m_codec.reset();

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
213 while (end < len && str[end] != quoteMark)	215 while (end < len && str[end] != quoteMark)

214 ++end;	216 ++end;

215 if (end >= len)	217 if (end >= len)

216 return -1;	218 return -1;

217	219

218 encodingLength = end - pos;	220 encodingLength = end - pos;

219 return pos;	221 return pos;

220 }	222 }

221	223

222 size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) {	224 size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) {

223 // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure sig n of a Unicode encoding.	225 // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure

224 // We let it override even a user-chosen encoding.	226 // sign of a Unicode encoding. We let it override even a user-chosen encoding.

225 ASSERT(!m_checkedForBOM);	227 ASSERT(!m_checkedForBOM);

226	228

227 size_t lengthOfBOM = 0;	229 size_t lengthOfBOM = 0;

228	230

229 size_t bufferLength = m_buffer.size();	231 size_t bufferLength = m_buffer.size();

230	232

231 size_t buf1Len = bufferLength;	233 size_t buf1Len = bufferLength;

232 size_t buf2Len = len;	234 size_t buf2Len = len;

233 const unsigned char* buf1 =	235 const unsigned char* buf1 =

234 reinterpret_cast<const unsigned char*>(m_buffer.data());	236 reinterpret_cast<const unsigned char*>(m_buffer.data());

(...skipping 95 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
330	332

331 movedDataToBuffer = true;	333 movedDataToBuffer = true;

332	334

333 const char* ptr = m_buffer.data();	335 const char* ptr = m_buffer.data();

334 const char* pEnd = ptr + m_buffer.size();	336 const char* pEnd = ptr + m_buffer.size();

335	337

336 // Is there enough data available to check for XML declaration?	338 // Is there enough data available to check for XML declaration?

337 if (m_buffer.size() < minimumLengthOfXMLDeclaration)	339 if (m_buffer.size() < minimumLengthOfXMLDeclaration)

338 return false;	340 return false;

339	341

340 // Handle XML declaration, which can have encoding in it. This encoding is hon ored even for HTML documents.	342 // Handle XML declaration, which can have encoding in it. This encoding is

341 // It is an error for an XML declaration not to be at the start of an XML docu ment, and it is ignored in HTML documents in such case.	343 // honored even for HTML documents. It is an error for an XML declaration not

	344 // to be at the start of an XML document, and it is ignored in HTML documents

	345 // in such case.

342 if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) {	346 if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) {

343 const char* xmlDeclarationEnd = ptr;	347 const char* xmlDeclarationEnd = ptr;

344 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>')	348 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>')

345 ++xmlDeclarationEnd;	349 ++xmlDeclarationEnd;

346 if (xmlDeclarationEnd == pEnd)	350 if (xmlDeclarationEnd == pEnd)

347 return false;	351 return false;

348 // No need for +1, because we have an extra "?" to lose at the end of XML de claration.	352 // No need for +1, because we have an extra "?" to lose at the end of XML

	353 // declaration.

349 int len = 0;	354 int len = 0;

350 int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len);	355 int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len);

351 if (pos != -1)	356 if (pos != -1)

352 setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader);	357 setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader);

353 // continue looking for a charset - it may be specified in an HTTP-Equiv met a	358 // continue looking for a charset - it may be specified in an HTTP-Equiv

	359 // meta

354 } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) {	360 } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) {

355 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);	361 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);

356 } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) {	362 } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) {

357 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);	363 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);

358 } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) {	364 } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) {

359 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding);	365 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding);

360 } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) {	366 } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) {

361 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding);	367 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding);

362 }	368 }

363	369

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
414 }	420 }

415 DCHECK_LE(lengthOfBOM, m_buffer.size() + len);	421 DCHECK_LE(lengthOfBOM, m_buffer.size() + len);

416	422

417 bool movedDataToBuffer = false;	423 bool movedDataToBuffer = false;

418	424

419 if (m_contentType == CSSContent && !m_checkedForCSSCharset) {	425 if (m_contentType == CSSContent && !m_checkedForCSSCharset) {

420 if (!checkForCSSCharset(data, len, movedDataToBuffer))	426 if (!checkForCSSCharset(data, len, movedDataToBuffer))

421 return emptyString();	427 return emptyString();

422 }	428 }

423	429

424 // We check XML declaration in HTML content only if there is enough data avail able	430 // We check XML declaration in HTML content only if there is enough data

	431 // available

425 if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) \|\|	432 if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) \|\|

426 m_contentType == XMLContent) &&	433 m_contentType == XMLContent) &&

427 !m_checkedForXMLCharset) {	434 !m_checkedForXMLCharset) {

428 if (!checkForXMLCharset(data, len, movedDataToBuffer))	435 if (!checkForXMLCharset(data, len, movedDataToBuffer))

429 return emptyString();	436 return emptyString();

430 }	437 }

431	438

432 const char* dataForDecode = data + lengthOfBOM;	439 const char* dataForDecode = data + lengthOfBOM;

433 size_t lengthForDecode = len - lengthOfBOM;	440 size_t lengthForDecode = len - lengthOfBOM;

434	441

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
485 String result = m_codec->decode(	492 String result = m_codec->decode(

486 m_buffer.data(), m_buffer.size(), FetchEOF,	493 m_buffer.data(), m_buffer.size(), FetchEOF,

487 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);	494 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);

488 m_buffer.clear();	495 m_buffer.clear();

489 m_codec.reset();	496 m_codec.reset();

490 m_checkedForBOM = false; // Skip BOM again when re-decoding.	497 m_checkedForBOM = false; // Skip BOM again when re-decoding.

491 return result;	498 return result;

492 }	499 }

493	500

494 } // namespace blink	501 } // namespace blink

OLD	NEW