Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp

Issue 2786913002: Replace the type of hint url for blink::detectTextEncoding (Closed)
Patch Set: const KURL Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
4 rights reserved. 4 rights reserved.
5 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) 5 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)
6 6
7 This library is free software; you can redistribute it and/or 7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public 8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either 9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version. 10 version 2 of the License, or (at your option) any later version.
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
135 return UTF8Encoding(); 135 return UTF8Encoding();
136 if (!specifiedDefaultEncoding.isValid()) 136 if (!specifiedDefaultEncoding.isValid())
137 return Latin1Encoding(); 137 return Latin1Encoding();
138 return specifiedDefaultEncoding; 138 return specifiedDefaultEncoding;
139 } 139 }
140 140
141 TextResourceDecoder::TextResourceDecoder( 141 TextResourceDecoder::TextResourceDecoder(
142 const String& mimeType, 142 const String& mimeType,
143 const WTF::TextEncoding& specifiedDefaultEncoding, 143 const WTF::TextEncoding& specifiedDefaultEncoding,
144 EncodingDetectionOption encodingDetectionOption, 144 EncodingDetectionOption encodingDetectionOption,
145 const String& url) 145 const KURL& hintUrl)
146 : m_contentType(determineContentType(mimeType)), 146 : m_contentType(determineContentType(mimeType)),
147 m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)), 147 m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)),
148 m_source(DefaultEncoding), 148 m_source(DefaultEncoding),
149 m_hintEncoding(0), 149 m_hintEncoding(0),
150 m_hintUrl(url.utf8()), 150 m_hintUrl(hintUrl),
151 m_checkedForBOM(false), 151 m_checkedForBOM(false),
152 m_checkedForCSSCharset(false), 152 m_checkedForCSSCharset(false),
153 m_checkedForXMLCharset(false), 153 m_checkedForXMLCharset(false),
154 m_checkedForMetaCharset(false), 154 m_checkedForMetaCharset(false),
155 m_useLenientXMLDecoding(false), 155 m_useLenientXMLDecoding(false),
156 m_sawError(false), 156 m_sawError(false),
157 m_encodingDetectionOption(encodingDetectionOption) { 157 m_encodingDetectionOption(encodingDetectionOption) {
158 m_hintLanguage[0] = 0; 158 m_hintLanguage[0] = 0;
159 if (m_encodingDetectionOption == AlwaysUseUTF8ForText) { 159 if (m_encodingDetectionOption == AlwaysUseUTF8ForText) {
160 DCHECK(m_contentType == PlainTextContent && m_encoding == UTF8Encoding()); 160 DCHECK(m_contentType == PlainTextContent && m_encoding == UTF8Encoding());
161 } else if (m_encodingDetectionOption == UseAllAutoDetection) { 161 } else if (m_encodingDetectionOption == UseAllAutoDetection) {
162 // Checking empty URL helps unit testing. Providing defaultLanguage() is 162 // Checking empty URL helps unit testing. Providing defaultLanguage() is
163 // sometimes difficult in tests. 163 // sometimes difficult in tests.
164 if (!url.isEmpty()) { 164 if (!hintUrl.isEmpty()) {
165 // This object is created in the main thread, but used in another thread. 165 // This object is created in the main thread, but used in another thread.
166 // We should not share an AtomicString. 166 // We should not share an AtomicString.
167 AtomicString locale = defaultLanguage(); 167 AtomicString locale = defaultLanguage();
168 if (locale.length() >= 2) { 168 if (locale.length() >= 2) {
169 // defaultLanguage() is always an ASCII string. 169 // defaultLanguage() is always an ASCII string.
170 m_hintLanguage[0] = static_cast<char>(locale[0]); 170 m_hintLanguage[0] = static_cast<char>(locale[0]);
171 m_hintLanguage[1] = static_cast<char>(locale[1]); 171 m_hintLanguage[1] = static_cast<char>(locale[1]);
172 m_hintLanguage[2] = 0; 172 m_hintLanguage[2] = 0;
173 } 173 }
174 } 174 }
(...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 465
466 dataForDecode = m_buffer.data() + lengthOfBOM; 466 dataForDecode = m_buffer.data() + lengthOfBOM;
467 lengthForDecode = m_buffer.size() - lengthOfBOM; 467 lengthForDecode = m_buffer.size() - lengthOfBOM;
468 } 468 }
469 469
470 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) 470 if (m_contentType == HTMLContent && !m_checkedForMetaCharset)
471 checkForMetaCharset(dataForDecode, lengthForDecode); 471 checkForMetaCharset(dataForDecode, lengthForDecode);
472 472
473 if (shouldAutoDetect()) { 473 if (shouldAutoDetect()) {
474 WTF::TextEncoding detectedEncoding; 474 WTF::TextEncoding detectedEncoding;
475 if (detectTextEncoding(data, len, m_hintEncoding, m_hintUrl.data(), 475 if (detectTextEncoding(data, len, m_hintEncoding, m_hintUrl, m_hintLanguage,
476 m_hintLanguage, &detectedEncoding)) 476 &detectedEncoding))
477 setEncoding(detectedEncoding, EncodingFromContentSniffing); 477 setEncoding(detectedEncoding, EncodingFromContentSniffing);
478 } 478 }
479 479
480 DCHECK(m_encoding.isValid()); 480 DCHECK(m_encoding.isValid());
481 481
482 if (!m_codec) 482 if (!m_codec)
483 m_codec = newTextCodec(m_encoding); 483 m_codec = newTextCodec(m_encoding);
484 484
485 String result = m_codec->decode( 485 String result = m_codec->decode(
486 dataForDecode, lengthForDecode, WTF::DoNotFlush, 486 dataForDecode, lengthForDecode, WTF::DoNotFlush,
487 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); 487 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
488 488
489 m_buffer.clear(); 489 m_buffer.clear();
490 return result; 490 return result;
491 } 491 }
492 492
493 String TextResourceDecoder::flush() { 493 String TextResourceDecoder::flush() {
494 // If we can not identify the encoding even after a document is completely 494 // If we can not identify the encoding even after a document is completely
495 // loaded, we need to detect the encoding if other conditions for 495 // loaded, we need to detect the encoding if other conditions for
496 // autodetection is satisfied. 496 // autodetection is satisfied.
497 if (m_buffer.size() && shouldAutoDetect() && 497 if (m_buffer.size() && shouldAutoDetect() &&
498 ((!m_checkedForXMLCharset && 498 ((!m_checkedForXMLCharset &&
499 (m_contentType == HTMLContent || m_contentType == XMLContent)) || 499 (m_contentType == HTMLContent || m_contentType == XMLContent)) ||
500 (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) { 500 (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) {
501 WTF::TextEncoding detectedEncoding; 501 WTF::TextEncoding detectedEncoding;
502 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, 502 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding,
503 m_hintUrl.data(), m_hintLanguage, &detectedEncoding)) 503 m_hintUrl, m_hintLanguage, &detectedEncoding))
504 setEncoding(detectedEncoding, EncodingFromContentSniffing); 504 setEncoding(detectedEncoding, EncodingFromContentSniffing);
505 } 505 }
506 506
507 if (!m_codec) 507 if (!m_codec)
508 m_codec = newTextCodec(m_encoding); 508 m_codec = newTextCodec(m_encoding);
509 509
510 String result = m_codec->decode( 510 String result = m_codec->decode(
511 m_buffer.data(), m_buffer.size(), WTF::FetchEOF, 511 m_buffer.data(), m_buffer.size(), WTF::FetchEOF,
512 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); 512 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
513 m_buffer.clear(); 513 m_buffer.clear();
514 m_codec.reset(); 514 m_codec.reset();
515 m_checkedForBOM = false; // Skip BOM again when re-decoding. 515 m_checkedForBOM = false; // Skip BOM again when re-decoding.
516 return result; 516 return result;
517 } 517 }
518 518
519 } // namespace blink 519 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698