Index: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
diff --git a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
index 7b74f86b64e4e7c192faaa86dd82ed94154c943d..be1084d5d3d248bbafa37fd22df25591c18dcdd0 100644 |
--- a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
+++ b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
@@ -111,7 +111,7 @@ const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten |
return specifiedDefaultEncoding; |
} |
-TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& specifiedDefaultEncoding, bool usesEncodingDetector) |
+TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& specifiedDefaultEncoding, EncodingDetectionOption encodingDetectionOption) |
: m_contentType(determineContentType(mimeType)) |
, m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) |
, m_source(DefaultEncoding) |
@@ -122,8 +122,10 @@ TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text |
, m_checkedForMetaCharset(false) |
, m_useLenientXMLDecoding(false) |
, m_sawError(false) |
- , m_usesEncodingDetector(usesEncodingDetector) |
+ , m_encodingDetectionOption(encodingDetectionOption) |
{ |
+ if (m_encodingDetectionOption == AlwaysUseUTF8ForText) |
+ ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding()); |
} |
TextResourceDecoder::~TextResourceDecoder() |
@@ -209,23 +211,25 @@ size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) |
unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; |
// Check for the BOM. |
- if (c1 == 0xFF && c2 == 0xFE) { |
- if (c3 || c4) { |
- setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); |
+ if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { |
+ setEncoding(UTF8Encoding(), AutoDetectedEncoding); |
+ lengthOfBOM = 3; |
+ } else if (m_encodingDetectionOption != AlwaysUseUTF8ForText) { |
+ if (c1 == 0xFF && c2 == 0xFE) { |
+ if (c3 || c4) { |
+ setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); |
+ lengthOfBOM = 2; |
+ } else { |
+ setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); |
+ lengthOfBOM = 4; |
+ } |
+ } else if (c1 == 0xFE && c2 == 0xFF) { |
+ setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); |
lengthOfBOM = 2; |
- } else { |
- setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); |
+ } else if (!c1 && !c2 && c3 == 0xFE && c4 == 0xFF) { |
+ setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); |
lengthOfBOM = 4; |
} |
- } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { |
- setEncoding(UTF8Encoding(), AutoDetectedEncoding); |
- lengthOfBOM = 3; |
- } else if (c1 == 0xFE && c2 == 0xFF) { |
- setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); |
- lengthOfBOM = 2; |
- } else if (!c1 && !c2 && c3 == 0xFE && c4 == 0xFF) { |
- setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); |
- lengthOfBOM = 4; |
} |
if (lengthOfBOM || bufferLength + len >= 4) |
@@ -359,7 +363,7 @@ bool TextResourceDecoder::shouldAutoDetect() const |
{ |
// Just checking m_hintEncoding suffices here because it's only set |
// in setHintEncoding when the source is AutoDetectedEncoding. |
- return m_usesEncodingDetector |
+ return m_encodingDetectionOption == UseAllAutoDetection |
&& (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); |
} |