Chromium Code Reviews| Index: base/i18n/encoding_detection.cc |
| diff --git a/base/i18n/encoding_detection.cc b/base/i18n/encoding_detection.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..a3e3be828e2fac8ebc49cd4d8677d1291420f99b |
| --- /dev/null |
| +++ b/base/i18n/encoding_detection.cc |
| @@ -0,0 +1,41 @@ |
| +// Copyright (c) 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "base/i18n/encoding_detection.h" |
| + |
| +#include "third_party/ced/src/compact_enc_det/compact_enc_det.h" |
| + |
| +namespace base { |
| + |
| +bool DetectEncoding(const std::string& text, std::string* encoding) { |
| + int consumedBytes; |
|
Lei Zhang
2016/08/08 22:09:55
nit: foo_bar, not fooBar.
Jinsuk Kim
2016/08/09 01:16:19
Done.
|
| + bool isReliable; |
| + Encoding enc = CompactEncDet::DetectEncoding( |
| + text.c_str(), text.length(), nullptr, nullptr, nullptr, |
| + UNKNOWN_ENCODING, |
| + UNKNOWN_LANGUAGE, |
| + CompactEncDet::QUERY_CORPUS, // plain text |
|
Lei Zhang
2016/08/08 22:09:55
two spaces in front of comments.
Jinsuk Kim
2016/08/09 01:16:19
Done.
|
| + false, // Include 7-bit encodings |
| + &consumedBytes, |
| + &isReliable); |
| + |
| + if (enc == UNKNOWN_ENCODING) |
| + return false; |
| + |
| + // 7-bit encodings (except ISO-2022-JP) are not supported in web standard. |
| + // Mark them as ascii to keep the raw bytes intact. |
| + switch (enc) { |
| + case HZ_GB_2312: |
| + case ISO_2022_KR: |
| + case ISO_2022_CN: |
| + case UTF7: |
| + enc = ASCII_7BIT; |
| + break; |
| + default: |
| + break; |
| + } |
| + *encoding = MimeEncodingName(enc); |
| + return true; |
| +} |
| +} // namespace base |