| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. | 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 12 matching lines...) Expand all Loading... |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 */ | 29 */ |
| 30 | 30 |
| 31 #include "platform/text/TextEncodingDetector.h" | 31 #include "platform/text/TextEncodingDetector.h" |
| 32 | 32 |
| 33 #include "platform/weborigin/KURL.h" |
| 33 #include "third_party/ced/src/compact_enc_det/compact_enc_det.h" | 34 #include "third_party/ced/src/compact_enc_det/compact_enc_det.h" |
| 34 #include "wtf/text/TextEncoding.h" | 35 #include "wtf/text/TextEncoding.h" |
| 35 | 36 |
| 36 namespace blink { | 37 namespace blink { |
| 37 | 38 |
| 38 bool detectTextEncoding(const char* data, | 39 bool detectTextEncoding(const char* data, |
| 39 size_t length, | 40 size_t length, |
| 40 const char* hintEncodingName, | 41 const char* hintEncodingName, |
| 41 const char* hintUrl, | 42 const char* hintUrl, |
| 42 const char* hintUserLanguage, | 43 const char* hintUserLanguage, |
| 43 WTF::TextEncoding* detectedEncoding) { | 44 WTF::TextEncoding* detectedEncoding) { |
| 44 *detectedEncoding = WTF::TextEncoding(); | 45 *detectedEncoding = WTF::TextEncoding(); |
| 45 Language language; | 46 Language language; |
| 46 LanguageFromCode(hintUserLanguage, &language); | 47 LanguageFromCode(hintUserLanguage, &language); |
| 47 int consumedBytes; | 48 int consumedBytes; |
| 48 bool isReliable; | 49 bool isReliable; |
| 49 Encoding encoding = CompactEncDet::DetectEncoding( | 50 Encoding encoding = CompactEncDet::DetectEncoding( |
| 50 data, length, hintUrl, nullptr, nullptr, | 51 data, length, hintUrl, nullptr, nullptr, |
| 51 EncodingNameAliasToEncoding(hintEncodingName), language, | 52 EncodingNameAliasToEncoding(hintEncodingName), language, |
| 52 CompactEncDet::WEB_CORPUS, | 53 CompactEncDet::WEB_CORPUS, |
| 53 false, // Include 7-bit encodings to detect ISO-2022-JP | 54 false, // Include 7-bit encodings to detect ISO-2022-JP |
| 54 &consumedBytes, &isReliable); | 55 &consumedBytes, &isReliable); |
| 55 | 56 |
| 56 // Should return false if the detected encoding is UTF8. This helps prevent | 57 // Should return false if the detected encoding is UTF8. This helps prevent |
| 57 // modern web sites from neglecting proper encoding labelling and simply | 58 // modern web sites from neglecting proper encoding labelling and simply |
| 58 // relying on browser-side encoding detection. Encoding detection is supposed | 59 // relying on browser-side encoding detection. Encoding detection is supposed |
| 59 // to work for web sites with legacy encoding only. Detection failure leads | 60 // to work for web sites with legacy encoding only (so this doesn't have to |
| 60 // |TextResourceDecoder| to use its default encoding determined from system | 61 // be applied to local file resources). |
| 61 // locale or TLD. | 62 // Detection failure leads |TextResourceDecoder| to use its default encoding |
| 62 if (encoding == UNKNOWN_ENCODING || encoding == UTF8) | 63 // determined from system locale or TLD. |
| 64 String protocol = hintUrl ? KURL(ParsedURLString, hintUrl).protocol() : ""; |
| 65 if (encoding == UNKNOWN_ENCODING || (protocol != "file" && encoding == UTF8)) |
| 63 return false; | 66 return false; |
| 64 | 67 |
| 65 // Map all the Shift-JIS variants to Shift-JIS. | 68 // Map all the Shift-JIS variants to Shift-JIS. |
| 66 if (hintUserLanguage && !strncmp(hintUserLanguage, "ja", 2) && | 69 if (hintUserLanguage && !strncmp(hintUserLanguage, "ja", 2) && |
| 67 IsShiftJisOrVariant(encoding)) { | 70 IsShiftJisOrVariant(encoding)) { |
| 68 encoding = JAPANESE_SHIFT_JIS; | 71 encoding = JAPANESE_SHIFT_JIS; |
| 69 } | 72 } |
| 70 | 73 |
| 71 // 7-bit encodings (except ISO-2022-JP), and some obscure encodings not | 74 // 7-bit encodings (except ISO-2022-JP), and some obscure encodings not |
| 72 // supported in WHATWG encoding standard are marked as ASCII to keep the raw | 75 // supported in WHATWG encoding standard are marked as ASCII to keep the raw |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 106 encoding = ASCII_7BIT; | 109 encoding = ASCII_7BIT; |
| 107 break; | 110 break; |
| 108 default: | 111 default: |
| 109 break; | 112 break; |
| 110 } | 113 } |
| 111 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); | 114 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); |
| 112 return true; | 115 return true; |
| 113 } | 116 } |
| 114 | 117 |
| 115 } // namespace blink | 118 } // namespace blink |
| OLD | NEW |