Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. |
| 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
| 5 | 5 |
| 6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
| 7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
| 8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
| 9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
| 10 | 10 |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 21 | 21 |
| 22 #include "core/html/parser/TextResourceDecoder.h" | 22 #include "core/html/parser/TextResourceDecoder.h" |
| 23 | 23 |
| 24 #include "core/HTMLNames.h" | 24 #include "core/HTMLNames.h" |
| 25 #include "core/dom/DOMImplementation.h" | 25 #include "core/dom/DOMImplementation.h" |
| 26 #include "core/html/parser/HTMLMetaCharsetParser.h" | 26 #include "core/html/parser/HTMLMetaCharsetParser.h" |
| 27 #include "platform/text/TextEncodingDetector.h" | 27 #include "platform/text/TextEncodingDetector.h" |
| 28 #include "wtf/StringExtras.h" | 28 #include "wtf/StringExtras.h" |
| 29 #include "wtf/text/TextCodec.h" | 29 #include "wtf/text/TextCodec.h" |
| 30 #include "wtf/text/TextEncodingRegistry.h" | 30 #include "wtf/text/TextEncodingRegistry.h" |
| 31 #include "wtf/text/UTF8.h" | |
| 31 | 32 |
| 32 using namespace WTF; | 33 using namespace WTF; |
| 33 | 34 |
| 34 namespace blink { | 35 namespace blink { |
| 35 | 36 |
| 36 using namespace HTMLNames; | 37 using namespace HTMLNames; |
| 37 | 38 |
| 38 const int minimumLengthOfXMLDeclaration = 8; | 39 const int minimumLengthOfXMLDeclaration = 8; |
| 39 | 40 |
| 40 static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4) | 41 static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4) |
| (...skipping 354 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 395 memcpy(m_buffer.data() + oldSize, data, len); | 396 memcpy(m_buffer.data() + oldSize, data, len); |
| 396 } | 397 } |
| 397 | 398 |
| 398 dataForDecode = m_buffer.data() + lengthOfBOM; | 399 dataForDecode = m_buffer.data() + lengthOfBOM; |
| 399 lengthForDecode = m_buffer.size() - lengthOfBOM; | 400 lengthForDecode = m_buffer.size() - lengthOfBOM; |
| 400 } | 401 } |
| 401 | 402 |
| 402 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) | 403 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) |
| 403 checkForMetaCharset(dataForDecode, lengthForDecode); | 404 checkForMetaCharset(dataForDecode, lengthForDecode); |
| 404 | 405 |
| 405 if (shouldAutoDetect()) { | 406 detectTextEncoding(data, len); |
| 406 WTF::TextEncoding detectedEncoding; | |
| 407 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) | |
| 408 setEncoding(detectedEncoding, EncodingFromContentSniffing); | |
| 409 } | |
| 410 | 407 |
| 411 ASSERT(m_encoding.isValid()); | 408 ASSERT(m_encoding.isValid()); |
| 412 | 409 |
| 413 if (!m_codec) | 410 if (!m_codec) |
| 414 m_codec = newTextCodec(m_encoding); | 411 m_codec = newTextCodec(m_encoding); |
| 415 | 412 |
| 416 String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 413 String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 417 | 414 |
| 418 m_buffer.clear(); | 415 m_buffer.clear(); |
| 419 return result; | 416 return result; |
| 420 } | 417 } |
| 421 | 418 |
| 419 void TextResourceDecoder::detectTextEncoding(const char* data, size_t len) | |
| 420 { | |
| 421 if (shouldAutoDetect()) { | |
| 422 WTF::TextEncoding detectedEncoding; | |
| 423 if (detectTextEncodingUniversal(data, len, m_hintEncoding, &detectedEnco ding)) | |
| 424 setEncoding(detectedEncoding, EncodingFromContentSniffing); | |
| 425 return; | |
| 426 } | |
| 427 if ((m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding))) { | |
|
jungshik at Google
2016/03/24 06:15:08
nit: The above condition is shared by shouldAuto
Jinsuk Kim
2016/03/25 02:15:42
Done.
| |
| 428 if (WTF::Unicode::isUTF8Encoded(data, len)) | |
| 429 setEncoding(UTF8Encoding(), EncodingFromContentSniffing); | |
|
jungshik at Google
2016/03/24 06:15:08
Given that isUTF8Encoded excludes 'ASCII' (by chec
Jinsuk Kim
2016/03/25 02:15:42
Makes sense. Done.
| |
| 430 } | |
| 431 } | |
| 432 | |
| 422 String TextResourceDecoder::flush() | 433 String TextResourceDecoder::flush() |
| 423 { | 434 { |
| 424 // If we can not identify the encoding even after a document is completely | 435 // If we can not identify the encoding even after a document is completely |
| 425 // loaded, we need to detect the encoding if other conditions for | 436 // loaded, we need to detect the encoding if other conditions for |
| 426 // autodetection is satisfied. | 437 // autodetection is satisfied. |
| 427 if (m_buffer.size() && shouldAutoDetect() | 438 if (m_buffer.size() |
| 428 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_conte ntType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSConte nt)))) { | 439 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_conte ntType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSConte nt)))) { |
| 429 WTF::TextEncoding detectedEncoding; | 440 detectTextEncoding(m_buffer.data(), m_buffer.size()); |
| 430 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding)) | |
| 431 setEncoding(detectedEncoding, EncodingFromContentSniffing); | |
| 432 } | 441 } |
| 433 | 442 |
| 434 if (!m_codec) | 443 if (!m_codec) |
| 435 m_codec = newTextCodec(m_encoding); | 444 m_codec = newTextCodec(m_encoding); |
| 436 | 445 |
| 437 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 446 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 438 m_buffer.clear(); | 447 m_buffer.clear(); |
| 439 m_codec.clear(); | 448 m_codec.clear(); |
| 440 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 449 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
| 441 return result; | 450 return result; |
| 442 } | 451 } |
| 443 | 452 |
| 444 } // namespace blink | 453 } // namespace blink |
| OLD | NEW |