Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(225)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp

Issue 2386893002: Reformat comments in core/html/parser (Closed)
Patch Set: self review Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
4 rights reserved.
4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) 5 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)
5 6
6 This library is free software; you can redistribute it and/or 7 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 8 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 9 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 10 version 2 of the License, or (at your option) any later version.
10 11
11 This library is distributed in the hope that it will be useful, 12 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
78 char b4, 79 char b4,
79 char b5, 80 char b5,
80 char b6, 81 char b6,
81 char b7, 82 char b7,
82 char b8, 83 char b8,
83 char b9) { 84 char b9) {
84 return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && 85 return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 &&
85 p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9; 86 p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9;
86 } 87 }
87 88
88 // You might think we should put these find functions elsewhere, perhaps with th e 89 // You might think we should put these find functions elsewhere, perhaps with
89 // similar functions that operate on UChar, but arguably only the decoder has 90 // the similar functions that operate on UChar, but arguably only the decoder
90 // a reason to process strings of char rather than UChar. 91 // has a reason to process strings of char rather than UChar.
91 92
92 static int find(const char* subject, size_t subjectLength, const char* target) { 93 static int find(const char* subject, size_t subjectLength, const char* target) {
93 size_t targetLength = strlen(target); 94 size_t targetLength = strlen(target);
94 if (targetLength > subjectLength) 95 if (targetLength > subjectLength)
95 return -1; 96 return -1;
96 for (size_t i = 0; i <= subjectLength - targetLength; ++i) { 97 for (size_t i = 0; i <= subjectLength - targetLength; ++i) {
97 bool match = true; 98 bool match = true;
98 for (size_t j = 0; j < targetLength; ++j) { 99 for (size_t j = 0; j < targetLength; ++j) {
99 if (subject[i + j] != target[j]) { 100 if (subject[i + j] != target[j]) {
100 match = false; 101 match = false;
(...skipping 21 matching lines...) Expand all
122 if (equalIgnoringCase(mimeType, "text/html")) 123 if (equalIgnoringCase(mimeType, "text/html"))
123 return HTMLContent; 124 return HTMLContent;
124 if (DOMImplementation::isXMLMIMEType(mimeType)) 125 if (DOMImplementation::isXMLMIMEType(mimeType))
125 return XMLContent; 126 return XMLContent;
126 return PlainTextContent; 127 return PlainTextContent;
127 } 128 }
128 129
129 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding( 130 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(
130 ContentType contentType, 131 ContentType contentType,
131 const WTF::TextEncoding& specifiedDefaultEncoding) { 132 const WTF::TextEncoding& specifiedDefaultEncoding) {
132 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 in stead of US-ASCII 133 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
133 // for text/xml. This matches Firefox. 134 // instead of US-ASCII for text/xml. This matches Firefox.
134 if (contentType == XMLContent) 135 if (contentType == XMLContent)
135 return UTF8Encoding(); 136 return UTF8Encoding();
136 if (!specifiedDefaultEncoding.isValid()) 137 if (!specifiedDefaultEncoding.isValid())
137 return Latin1Encoding(); 138 return Latin1Encoding();
138 return specifiedDefaultEncoding; 139 return specifiedDefaultEncoding;
139 } 140 }
140 141
141 TextResourceDecoder::TextResourceDecoder( 142 TextResourceDecoder::TextResourceDecoder(
142 const String& mimeType, 143 const String& mimeType,
143 const WTF::TextEncoding& specifiedDefaultEncoding, 144 const WTF::TextEncoding& specifiedDefaultEncoding,
(...skipping 10 matching lines...) Expand all
154 m_sawError(false), 155 m_sawError(false),
155 m_encodingDetectionOption(encodingDetectionOption) { 156 m_encodingDetectionOption(encodingDetectionOption) {
156 if (m_encodingDetectionOption == AlwaysUseUTF8ForText) 157 if (m_encodingDetectionOption == AlwaysUseUTF8ForText)
157 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding()); 158 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding());
158 } 159 }
159 160
160 TextResourceDecoder::~TextResourceDecoder() {} 161 TextResourceDecoder::~TextResourceDecoder() {}
161 162
162 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, 163 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding,
163 EncodingSource source) { 164 EncodingSource source) {
164 // In case the encoding didn't exist, we keep the old one (helps some sites sp ecifying invalid encodings). 165 // In case the encoding didn't exist, we keep the old one (helps some sites
166 // specifying invalid encodings).
165 if (!encoding.isValid()) 167 if (!encoding.isValid())
166 return; 168 return;
167 169
168 // When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR ), 170 // When encoding comes from meta tag (i.e. it cannot be XML files sent via
169 // treat x-user-defined as windows-1252 (bug 18270) 171 // XHR), treat x-user-defined as windows-1252 (bug 18270)
170 if (source == EncodingFromMetaTag && 172 if (source == EncodingFromMetaTag &&
171 !strcasecmp(encoding.name(), "x-user-defined")) 173 !strcasecmp(encoding.name(), "x-user-defined"))
172 m_encoding = "windows-1252"; 174 m_encoding = "windows-1252";
173 else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || 175 else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader ||
174 source == EncodingFromCSSCharset) 176 source == EncodingFromCSSCharset)
175 m_encoding = encoding.closestByteBasedEquivalent(); 177 m_encoding = encoding.closestByteBasedEquivalent();
176 else 178 else
177 m_encoding = encoding; 179 m_encoding = encoding;
178 180
179 m_codec.reset(); 181 m_codec.reset();
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
213 while (end < len && str[end] != quoteMark) 215 while (end < len && str[end] != quoteMark)
214 ++end; 216 ++end;
215 if (end >= len) 217 if (end >= len)
216 return -1; 218 return -1;
217 219
218 encodingLength = end - pos; 220 encodingLength = end - pos;
219 return pos; 221 return pos;
220 } 222 }
221 223
222 size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) { 224 size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) {
223 // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure sig n of a Unicode encoding. 225 // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure
224 // We let it override even a user-chosen encoding. 226 // sign of a Unicode encoding. We let it override even a user-chosen encoding.
225 ASSERT(!m_checkedForBOM); 227 ASSERT(!m_checkedForBOM);
226 228
227 size_t lengthOfBOM = 0; 229 size_t lengthOfBOM = 0;
228 230
229 size_t bufferLength = m_buffer.size(); 231 size_t bufferLength = m_buffer.size();
230 232
231 size_t buf1Len = bufferLength; 233 size_t buf1Len = bufferLength;
232 size_t buf2Len = len; 234 size_t buf2Len = len;
233 const unsigned char* buf1 = 235 const unsigned char* buf1 =
234 reinterpret_cast<const unsigned char*>(m_buffer.data()); 236 reinterpret_cast<const unsigned char*>(m_buffer.data());
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
330 332
331 movedDataToBuffer = true; 333 movedDataToBuffer = true;
332 334
333 const char* ptr = m_buffer.data(); 335 const char* ptr = m_buffer.data();
334 const char* pEnd = ptr + m_buffer.size(); 336 const char* pEnd = ptr + m_buffer.size();
335 337
336 // Is there enough data available to check for XML declaration? 338 // Is there enough data available to check for XML declaration?
337 if (m_buffer.size() < minimumLengthOfXMLDeclaration) 339 if (m_buffer.size() < minimumLengthOfXMLDeclaration)
338 return false; 340 return false;
339 341
340 // Handle XML declaration, which can have encoding in it. This encoding is hon ored even for HTML documents. 342 // Handle XML declaration, which can have encoding in it. This encoding is
341 // It is an error for an XML declaration not to be at the start of an XML docu ment, and it is ignored in HTML documents in such case. 343 // honored even for HTML documents. It is an error for an XML declaration not
344 // to be at the start of an XML document, and it is ignored in HTML documents
345 // in such case.
342 if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) { 346 if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) {
343 const char* xmlDeclarationEnd = ptr; 347 const char* xmlDeclarationEnd = ptr;
344 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>') 348 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>')
345 ++xmlDeclarationEnd; 349 ++xmlDeclarationEnd;
346 if (xmlDeclarationEnd == pEnd) 350 if (xmlDeclarationEnd == pEnd)
347 return false; 351 return false;
348 // No need for +1, because we have an extra "?" to lose at the end of XML de claration. 352 // No need for +1, because we have an extra "?" to lose at the end of XML
353 // declaration.
349 int len = 0; 354 int len = 0;
350 int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len); 355 int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len);
351 if (pos != -1) 356 if (pos != -1)
352 setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader); 357 setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader);
353 // continue looking for a charset - it may be specified in an HTTP-Equiv met a 358 // continue looking for a charset - it may be specified in an HTTP-Equiv
359 // meta
354 } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) { 360 } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) {
355 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); 361 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);
356 } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) { 362 } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) {
357 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); 363 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);
358 } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) { 364 } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) {
359 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); 365 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding);
360 } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) { 366 } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) {
361 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); 367 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding);
362 } 368 }
363 369
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
414 } 420 }
415 DCHECK_LE(lengthOfBOM, m_buffer.size() + len); 421 DCHECK_LE(lengthOfBOM, m_buffer.size() + len);
416 422
417 bool movedDataToBuffer = false; 423 bool movedDataToBuffer = false;
418 424
419 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { 425 if (m_contentType == CSSContent && !m_checkedForCSSCharset) {
420 if (!checkForCSSCharset(data, len, movedDataToBuffer)) 426 if (!checkForCSSCharset(data, len, movedDataToBuffer))
421 return emptyString(); 427 return emptyString();
422 } 428 }
423 429
424 // We check XML declaration in HTML content only if there is enough data avail able 430 // We check XML declaration in HTML content only if there is enough data
431 // available
425 if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || 432 if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) ||
426 m_contentType == XMLContent) && 433 m_contentType == XMLContent) &&
427 !m_checkedForXMLCharset) { 434 !m_checkedForXMLCharset) {
428 if (!checkForXMLCharset(data, len, movedDataToBuffer)) 435 if (!checkForXMLCharset(data, len, movedDataToBuffer))
429 return emptyString(); 436 return emptyString();
430 } 437 }
431 438
432 const char* dataForDecode = data + lengthOfBOM; 439 const char* dataForDecode = data + lengthOfBOM;
433 size_t lengthForDecode = len - lengthOfBOM; 440 size_t lengthForDecode = len - lengthOfBOM;
434 441
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
485 String result = m_codec->decode( 492 String result = m_codec->decode(
486 m_buffer.data(), m_buffer.size(), FetchEOF, 493 m_buffer.data(), m_buffer.size(), FetchEOF,
487 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); 494 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
488 m_buffer.clear(); 495 m_buffer.clear();
489 m_codec.reset(); 496 m_codec.reset();
490 m_checkedForBOM = false; // Skip BOM again when re-decoding. 497 m_checkedForBOM = false; // Skip BOM again when re-decoding.
491 return result; 498 return result;
492 } 499 }
493 500
494 } // namespace blink 501 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698