OLD | NEW |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/i18n/encoding_detection.h" | 5 #include "base/i18n/encoding_detection.h" |
6 | 6 |
7 #include "third_party/ced/src/compact_enc_det/compact_enc_det.h" | 7 #include "third_party/ced/src/compact_enc_det/compact_enc_det.h" |
8 | 8 |
9 namespace base { | 9 namespace base { |
10 | 10 |
11 bool DetectEncoding(const std::string& text, std::string* encoding) { | 11 bool DetectEncoding(const std::string& text, std::string* encoding) { |
12 int consumed_bytes; | 12 int consumed_bytes; |
13 bool is_reliable; | 13 bool is_reliable; |
14 Encoding enc = CompactEncDet::DetectEncoding( | 14 Encoding enc = CompactEncDet::DetectEncoding( |
15 text.c_str(), text.length(), nullptr, nullptr, nullptr, | 15 text.c_str(), text.length(), nullptr, nullptr, nullptr, |
16 UNKNOWN_ENCODING, | 16 UNKNOWN_ENCODING, |
17 UNKNOWN_LANGUAGE, | 17 UNKNOWN_LANGUAGE, |
18 CompactEncDet::QUERY_CORPUS, // plain text | 18 CompactEncDet::QUERY_CORPUS, // plain text |
19 false, // Include 7-bit encodings | 19 false, // Include 7-bit encodings |
20 &consumed_bytes, | 20 &consumed_bytes, |
21 &is_reliable); | 21 &is_reliable); |
22 | 22 |
23 if (enc == UNKNOWN_ENCODING) | 23 if (enc == UNKNOWN_ENCODING) |
24 return false; | 24 return false; |
25 | 25 |
26 // 7-bit encodings (except ISO-2022-JP) are not supported in web standard. | |
27 // Mark them as ascii to keep the raw bytes intact. | |
28 switch (enc) { | |
29 case HZ_GB_2312: | |
30 case ISO_2022_KR: | |
31 case ISO_2022_CN: | |
32 case UTF7: | |
33 enc = ASCII_7BIT; | |
34 break; | |
35 default: | |
36 break; | |
37 } | |
38 *encoding = MimeEncodingName(enc); | 26 *encoding = MimeEncodingName(enc); |
39 return true; | 27 return true; |
40 } | 28 } |
41 } // namespace base | 29 } // namespace base |
OLD | NEW |