Chromium Code Reviews| Index: components/translate/core/language_detection/chinese_script_classifier.h |
| diff --git a/components/translate/core/language_detection/chinese_script_classifier.h b/components/translate/core/language_detection/chinese_script_classifier.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..c4243c32286c14a08f8eb932997ffcf3d583169f |
| --- /dev/null |
| +++ b/components/translate/core/language_detection/chinese_script_classifier.h |
| @@ -0,0 +1,49 @@ |
| +// Copyright 2017 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#ifndef COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_CHINESE_SCRIPT_CLASSIFIER_H_ |
| +#define COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_CHINESE_SCRIPT_CLASSIFIER_H_ |
| + |
| +#include <memory> |
| +#include <string> |
| +#include "third_party/icu/source/i18n/unicode/translit.h" |
| + |
| +namespace translate { |
| + |
| +class ChineseScriptClassifier { |
| + public: |
| + // Initializes both the Hant-to-Hans ICU transliterator and the |
| + // Hans-to-Hant ICU transliterator. |
| + ChineseScriptClassifier(); |
| + ~ChineseScriptClassifier(); |
| + |
| + // Given Chinese text as input, returns either zh-Hant or zh-Hans. |
| + // When the input is ambiguous, i.e. not completely zh-Hans and not |
| + // completely zh-Hant, this function returns the closest language code |
| + // matching the input. |
| + // |
| + // Behavior is undefined for non-Chinese input. |
| + std::string Classify(const std::string& input) const; |
| + |
| + // Returns true if the underlying transliterators were properly initialized |
| + // by the constructor. |
| + bool IsInitialized() const; |
|
groby-ooo-7-16
2017/03/08 00:41:55
Maybe use a factory mode instead? (It's a personal
riesa
2017/03/08 01:47:42
My understanding is that the factory model is not
|
| + |
| + private: |
| + // BCP 47 language code representing Chinese in Han Simplified script. |
| + static const char kChineseSimplifiedCode[]; |
|
groby-ooo-7-16
2017/03/08 00:41:55
Why keep those as class members, as opposed to ano
riesa
2017/03/08 01:47:42
Just a style/organization preference. But in a mas
|
| + |
| + // BCP 47 language code representing Chinese in Han Traditional script. |
| + static const char kChineseTraditionalCode[]; |
| + |
| + // ICU Transliterator that does Hans to Hant conversion. |
| + std::unique_ptr<icu::Transliterator> hans2hant_; |
| + |
| + // ICU Transliterator that does Hant to Hans conversion. |
| + std::unique_ptr<icu::Transliterator> hant2hans_; |
| +}; |
| + |
| +} // namespace translate |
| + |
| +#endif // COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_CHINESE_SCRIPT_CLASSIFIER_H_ |