Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1142)

Side by Side Diff: third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp

Issue 2803563004: Avoid using language hint in encoding detection (Closed)
Patch Set: fix bug Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 25 matching lines...) Expand all
36 36
37 namespace blink { 37 namespace blink {
38 38
39 bool detectTextEncoding(const char* data, 39 bool detectTextEncoding(const char* data,
40 size_t length, 40 size_t length,
41 const char* hintEncodingName, 41 const char* hintEncodingName,
42 const KURL& hintUrl, 42 const KURL& hintUrl,
43 const char* hintUserLanguage, 43 const char* hintUserLanguage,
44 WTF::TextEncoding* detectedEncoding) { 44 WTF::TextEncoding* detectedEncoding) {
45 *detectedEncoding = WTF::TextEncoding(); 45 *detectedEncoding = WTF::TextEncoding();
46 Language language; 46 // In general, do not use language hint. This helps get more
47 LanguageFromCode(hintUserLanguage, &language); 47 // deterministic encoding detection results across devices. Note that local
48 // file resources can still benefit from the hint.
49 Language language = UNKNOWN_LANGUAGE;
50 if (hintUrl.protocol() == "file")
51 LanguageFromCode(hintUserLanguage, &language);
48 int consumedBytes; 52 int consumedBytes;
49 bool isReliable; 53 bool isReliable;
50 Encoding encoding = CompactEncDet::DetectEncoding( 54 Encoding encoding = CompactEncDet::DetectEncoding(
51 data, length, hintUrl.getString().ascii().data(), nullptr, nullptr, 55 data, length, hintUrl.getString().ascii().data(), nullptr, nullptr,
52 EncodingNameAliasToEncoding(hintEncodingName), language, 56 EncodingNameAliasToEncoding(hintEncodingName), language,
53 CompactEncDet::WEB_CORPUS, 57 CompactEncDet::WEB_CORPUS,
54 false, // Include 7-bit encodings to detect ISO-2022-JP 58 false, // Include 7-bit encodings to detect ISO-2022-JP
55 &consumedBytes, &isReliable); 59 &consumedBytes, &isReliable);
56 60
57 // Should return false if the detected encoding is UTF8. This helps prevent 61 // Should return false if the detected encoding is UTF8. This helps prevent
58 // modern web sites from neglecting proper encoding labelling and simply 62 // modern web sites from neglecting proper encoding labelling and simply
59 // relying on browser-side encoding detection. Encoding detection is supposed 63 // relying on browser-side encoding detection. Encoding detection is supposed
60 // to work for web sites with legacy encoding only (so this doesn't have to 64 // to work for web sites with legacy encoding only (so this doesn't have to
61 // be applied to local file resources). 65 // be applied to local file resources).
62 // Detection failure leads |TextResourceDecoder| to use its default encoding 66 // Detection failure leads |TextResourceDecoder| to use its default encoding
63 // determined from system locale or TLD. 67 // determined from system locale or TLD.
64 if (encoding == UNKNOWN_ENCODING || 68 if (encoding == UNKNOWN_ENCODING ||
65 (hintUrl.protocol() != "file" && encoding == UTF8)) 69 (hintUrl.protocol() != "file" && encoding == UTF8))
66 return false; 70 return false;
67 71
68 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); 72 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding));
69 return true; 73 return true;
70 } 74 }
71 75
72 } // namespace blink 76 } // namespace blink
OLDNEW
« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698