| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. | 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 25 matching lines...) Expand all Loading... |
| 36 | 36 |
| 37 namespace blink { | 37 namespace blink { |
| 38 | 38 |
| 39 bool detectTextEncoding(const char* data, | 39 bool detectTextEncoding(const char* data, |
| 40 size_t length, | 40 size_t length, |
| 41 const char* hintEncodingName, | 41 const char* hintEncodingName, |
| 42 const KURL& hintUrl, | 42 const KURL& hintUrl, |
| 43 const char* hintUserLanguage, | 43 const char* hintUserLanguage, |
| 44 WTF::TextEncoding* detectedEncoding) { | 44 WTF::TextEncoding* detectedEncoding) { |
| 45 *detectedEncoding = WTF::TextEncoding(); | 45 *detectedEncoding = WTF::TextEncoding(); |
| 46 Language language; | 46 // In general, do not use language hint. This helps get more |
| 47 LanguageFromCode(hintUserLanguage, &language); | 47 // deterministic encoding detection results across devices. Note that local |
| 48 // file resources can still benefit from the hint. |
| 49 Language language = UNKNOWN_LANGUAGE; |
| 50 if (hintUrl.protocol() == "file") |
| 51 LanguageFromCode(hintUserLanguage, &language); |
| 48 int consumedBytes; | 52 int consumedBytes; |
| 49 bool isReliable; | 53 bool isReliable; |
| 50 Encoding encoding = CompactEncDet::DetectEncoding( | 54 Encoding encoding = CompactEncDet::DetectEncoding( |
| 51 data, length, hintUrl.getString().ascii().data(), nullptr, nullptr, | 55 data, length, hintUrl.getString().ascii().data(), nullptr, nullptr, |
| 52 EncodingNameAliasToEncoding(hintEncodingName), language, | 56 EncodingNameAliasToEncoding(hintEncodingName), language, |
| 53 CompactEncDet::WEB_CORPUS, | 57 CompactEncDet::WEB_CORPUS, |
| 54 false, // Include 7-bit encodings to detect ISO-2022-JP | 58 false, // Include 7-bit encodings to detect ISO-2022-JP |
| 55 &consumedBytes, &isReliable); | 59 &consumedBytes, &isReliable); |
| 56 | 60 |
| 57 // Should return false if the detected encoding is UTF8. This helps prevent | 61 // Should return false if the detected encoding is UTF8. This helps prevent |
| 58 // modern web sites from neglecting proper encoding labelling and simply | 62 // modern web sites from neglecting proper encoding labelling and simply |
| 59 // relying on browser-side encoding detection. Encoding detection is supposed | 63 // relying on browser-side encoding detection. Encoding detection is supposed |
| 60 // to work for web sites with legacy encoding only (so this doesn't have to | 64 // to work for web sites with legacy encoding only (so this doesn't have to |
| 61 // be applied to local file resources). | 65 // be applied to local file resources). |
| 62 // Detection failure leads |TextResourceDecoder| to use its default encoding | 66 // Detection failure leads |TextResourceDecoder| to use its default encoding |
| 63 // determined from system locale or TLD. | 67 // determined from system locale or TLD. |
| 64 if (encoding == UNKNOWN_ENCODING || | 68 if (encoding == UNKNOWN_ENCODING || |
| 65 (hintUrl.protocol() != "file" && encoding == UTF8)) | 69 (hintUrl.protocol() != "file" && encoding == UTF8)) |
| 66 return false; | 70 return false; |
| 67 | 71 |
| 68 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); | 72 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); |
| 69 return true; | 73 return true; |
| 70 } | 74 } |
| 71 | 75 |
| 72 } // namespace blink | 76 } // namespace blink |
| OLD | NEW |