Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. | 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 */ | 29 */ |
| 30 | 30 |
| 31 #include "platform/text/TextEncodingDetector.h" | 31 #include "platform/text/TextEncodingDetector.h" |
| 32 | 32 |
| 33 #include "platform/weborigin/KURL.h" | |
| 33 #include "third_party/ced/src/compact_enc_det/compact_enc_det.h" | 34 #include "third_party/ced/src/compact_enc_det/compact_enc_det.h" |
| 34 #include "wtf/text/TextEncoding.h" | 35 #include "wtf/text/TextEncoding.h" |
| 35 | 36 |
| 36 namespace blink { | 37 namespace blink { |
| 37 | 38 |
| 38 bool detectTextEncoding(const char* data, | 39 bool detectTextEncoding(const char* data, |
| 39 size_t length, | 40 size_t length, |
| 40 const char* hintEncodingName, | 41 const char* hintEncodingName, |
| 41 const char* hintUrl, | 42 const char* hintUrl, |
|
tkent
2017/03/29 22:53:58
Let's make the |hintUrl| argument KURL later.
Jinsuk Kim
2017/03/29 23:14:36
Yes that would help avoid recreate KURL object mor
| |
| 42 const char* hintUserLanguage, | 43 const char* hintUserLanguage, |
| 43 WTF::TextEncoding* detectedEncoding) { | 44 WTF::TextEncoding* detectedEncoding) { |
| 44 *detectedEncoding = WTF::TextEncoding(); | 45 *detectedEncoding = WTF::TextEncoding(); |
| 45 Language language; | 46 Language language; |
| 46 LanguageFromCode(hintUserLanguage, &language); | 47 LanguageFromCode(hintUserLanguage, &language); |
| 47 int consumedBytes; | 48 int consumedBytes; |
| 48 bool isReliable; | 49 bool isReliable; |
| 49 Encoding encoding = CompactEncDet::DetectEncoding( | 50 Encoding encoding = CompactEncDet::DetectEncoding( |
| 50 data, length, hintUrl, nullptr, nullptr, | 51 data, length, hintUrl, nullptr, nullptr, |
| 51 EncodingNameAliasToEncoding(hintEncodingName), language, | 52 EncodingNameAliasToEncoding(hintEncodingName), language, |
| 52 CompactEncDet::WEB_CORPUS, | 53 CompactEncDet::WEB_CORPUS, |
| 53 false, // Include 7-bit encodings to detect ISO-2022-JP | 54 false, // Include 7-bit encodings to detect ISO-2022-JP |
| 54 &consumedBytes, &isReliable); | 55 &consumedBytes, &isReliable); |
| 55 | 56 |
| 56 // Should return false if the detected encoding is UTF8. This helps prevent | 57 // Should return false if the detected encoding is UTF8. This helps prevent |
| 57 // modern web sites from neglecting proper encoding labelling and simply | 58 // modern web sites from neglecting proper encoding labelling and simply |
| 58 // relying on browser-side encoding detection. Encoding detection is supposed | 59 // relying on browser-side encoding detection. Encoding detection is supposed |
| 59 // to work for web sites with legacy encoding only. Detection failure leads | 60 // to work for web sites with legacy encoding only (so this doesn't have to |
| 60 // |TextResourceDecoder| to use its default encoding determined from system | 61 // be applied to local file resources). |
| 61 // locale or TLD. | 62 // Detection failure leads |TextResourceDecoder| to use its default encoding |
| 62 if (encoding == UNKNOWN_ENCODING || encoding == UTF8) | 63 // determined from system locale or TLD. |
| 64 String protocol = hintUrl ? KURL(ParsedURLString, hintUrl).protocol() : ""; | |
| 65 if (encoding == UNKNOWN_ENCODING || (protocol != "file" && encoding == UTF8)) | |
| 63 return false; | 66 return false; |
| 64 | 67 |
| 65 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); | 68 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); |
| 66 return true; | 69 return true; |
| 67 } | 70 } |
| 68 | 71 |
| 69 } // namespace blink | 72 } // namespace blink |
| OLD | NEW |