| OLD | NEW |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_CHINESE_SCRIPT_CLASSIFIER_H
_ | 5 #ifndef COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_CHINESE_SCRIPT_CLASSIFIER_H
_ |
| 6 #define COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_CHINESE_SCRIPT_CLASSIFIER_H
_ | 6 #define COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_CHINESE_SCRIPT_CLASSIFIER_H
_ |
| 7 | 7 |
| 8 #include <memory> | 8 #include <memory> |
| 9 #include <string> | 9 #include <string> |
| 10 #include "third_party/icu/source/i18n/unicode/translit.h" | 10 #include "third_party/icu/source/common/unicode/uniset.h" |
| 11 | 11 |
| 12 namespace translate { | 12 namespace translate { |
| 13 | 13 |
| 14 class ChineseScriptClassifier { | 14 class ChineseScriptClassifier { |
| 15 public: | 15 public: |
| 16 // Initializes both the Hant-to-Hans ICU transliterator and the | 16 // Initializes both the zh-Hans and zh-Hant UnicodeSets used for |
| 17 // Hans-to-Hant ICU transliterator. | 17 // lookup when Classify is called. |
| 18 ChineseScriptClassifier(); | 18 ChineseScriptClassifier(); |
| 19 ~ChineseScriptClassifier(); | 19 ~ChineseScriptClassifier(); |
| 20 | 20 |
| 21 // Given Chinese text as input, returns either zh-Hant or zh-Hans. | 21 // Given Chinese text as input, returns either zh-Hant or zh-Hans. |
| 22 // When the input is ambiguous, i.e. not completely zh-Hans and not | 22 // When the input is ambiguous, i.e. not completely zh-Hans and not |
| 23 // completely zh-Hant, this function returns the closest language code | 23 // completely zh-Hant, this function returns the closest language code |
| 24 // matching the input. | 24 // matching the input. |
| 25 // | 25 // |
| 26 // Behavior is undefined for non-Chinese input. | 26 // Behavior is undefined for non-Chinese input. |
| 27 std::string Classify(const std::string& input) const; | 27 std::string Classify(const std::string& input) const; |
| 28 | 28 |
| 29 // Returns true if the underlying transliterators were properly initialized | 29 // Returns true if the underlying transliterators were properly initialized |
| 30 // by the constructor. | 30 // by the constructor. |
| 31 bool IsInitialized() const; | 31 bool IsInitialized() const; |
| 32 | 32 |
| 33 private: | 33 private: |
| 34 // ICU Transliterator that does Hans to Hant conversion. | 34 // Set of chars generally unique to zh-Hans. |
| 35 std::unique_ptr<icu::Transliterator> hans2hant_; | 35 std::unique_ptr<icu::UnicodeSet> hans_set_; |
| 36 | 36 |
| 37 // ICU Transliterator that does Hant to Hans conversion. | 37 // Set of chars generally unique to zh-Hant. |
| 38 std::unique_ptr<icu::Transliterator> hant2hans_; | 38 std::unique_ptr<icu::UnicodeSet> hant_set_; |
| 39 }; | 39 }; |
| 40 | 40 |
| 41 } // namespace translate | 41 } // namespace translate |
| 42 | 42 |
| 43 #endif // COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_CHINESE_SCRIPT_CLASSIFIE
R_H_ | 43 #endif // COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_CHINESE_SCRIPT_CLASSIFIE
R_H_ |
| OLD | NEW |