Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(158)

Side by Side Diff: third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp

Issue 2784483003: Respect UTF-8 detection result for local file resources (Closed)
Patch Set: Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 12 matching lines...) Expand all
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */ 29 */
30 30
31 #include "platform/text/TextEncodingDetector.h" 31 #include "platform/text/TextEncodingDetector.h"
32 32
33 #include "platform/weborigin/KURL.h"
33 #include "third_party/ced/src/compact_enc_det/compact_enc_det.h" 34 #include "third_party/ced/src/compact_enc_det/compact_enc_det.h"
34 #include "wtf/text/TextEncoding.h" 35 #include "wtf/text/TextEncoding.h"
35 36
36 namespace blink { 37 namespace blink {
37 38
38 bool detectTextEncoding(const char* data, 39 bool detectTextEncoding(const char* data,
39 size_t length, 40 size_t length,
40 const char* hintEncodingName, 41 const char* hintEncodingName,
41 const char* hintUrl, 42 const char* hintUrl,
tkent 2017/03/29 22:53:58 Let's make the |hintUrl| argument KURL later.
Jinsuk Kim 2017/03/29 23:14:36 Yes that would help avoid recreate KURL object mor
42 const char* hintUserLanguage, 43 const char* hintUserLanguage,
43 WTF::TextEncoding* detectedEncoding) { 44 WTF::TextEncoding* detectedEncoding) {
44 *detectedEncoding = WTF::TextEncoding(); 45 *detectedEncoding = WTF::TextEncoding();
45 Language language; 46 Language language;
46 LanguageFromCode(hintUserLanguage, &language); 47 LanguageFromCode(hintUserLanguage, &language);
47 int consumedBytes; 48 int consumedBytes;
48 bool isReliable; 49 bool isReliable;
49 Encoding encoding = CompactEncDet::DetectEncoding( 50 Encoding encoding = CompactEncDet::DetectEncoding(
50 data, length, hintUrl, nullptr, nullptr, 51 data, length, hintUrl, nullptr, nullptr,
51 EncodingNameAliasToEncoding(hintEncodingName), language, 52 EncodingNameAliasToEncoding(hintEncodingName), language,
52 CompactEncDet::WEB_CORPUS, 53 CompactEncDet::WEB_CORPUS,
53 false, // Include 7-bit encodings to detect ISO-2022-JP 54 false, // Include 7-bit encodings to detect ISO-2022-JP
54 &consumedBytes, &isReliable); 55 &consumedBytes, &isReliable);
55 56
56 // Should return false if the detected encoding is UTF8. This helps prevent 57 // Should return false if the detected encoding is UTF8. This helps prevent
57 // modern web sites from neglecting proper encoding labelling and simply 58 // modern web sites from neglecting proper encoding labelling and simply
58 // relying on browser-side encoding detection. Encoding detection is supposed 59 // relying on browser-side encoding detection. Encoding detection is supposed
59 // to work for web sites with legacy encoding only. Detection failure leads 60 // to work for web sites with legacy encoding only (so this doesn't have to
60 // |TextResourceDecoder| to use its default encoding determined from system 61 // be applied to local file resources).
61 // locale or TLD. 62 // Detection failure leads |TextResourceDecoder| to use its default encoding
62 if (encoding == UNKNOWN_ENCODING || encoding == UTF8) 63 // determined from system locale or TLD.
64 String protocol = hintUrl ? KURL(ParsedURLString, hintUrl).protocol() : "";
65 if (encoding == UNKNOWN_ENCODING || (protocol != "file" && encoding == UTF8))
63 return false; 66 return false;
64 67
65 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); 68 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding));
66 return true; 69 return true;
67 } 70 }
68 71
69 } // namespace blink 72 } // namespace blink
OLDNEW
« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698