components/translate/core/language_detection/chinese_script_classifier_test.cc - Issue 2756313002: [Merge M-58] Adds ChineseScriptClassifier to predict zh-Hant or zh-Hans for input detected as zh.

Unified Diff: components/translate/core/language_detection/chinese_script_classifier_test.cc

Issue 2756313002: [Merge M-58] Adds ChineseScriptClassifier to predict zh-Hant or zh-Hans for input detected as zh. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « components/translate/core/language_detection/chinese_script_classifier.cc ('k') | components/translate/core/language_detection/language_detection_util.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: components/translate/core/language_detection/chinese_script_classifier_test.cc

diff --git a/components/translate/core/language_detection/chinese_script_classifier_test.cc b/components/translate/core/language_detection/chinese_script_classifier_test.cc

new file mode 100644

index 0000000000000000000000000000000000000000..d95b0c1ac61401fc9df2ce7b9808f8f99be0ab90

--- /dev/null

+++ b/components/translate/core/language_detection/chinese_script_classifier_test.cc

@@ -0,0 +1,72 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "components/translate/core/language_detection/chinese_script_classifier.h"

+#include <string>

+#include <utility>

+#include <vector>

+#include "testing/gtest/include/gtest/gtest.h"

+namespace translate {

+namespace {

+class ChineseScriptClassifierTest : public testing::Test {

+ protected:

+ ChineseScriptClassifier classifier_;

+};

+TEST_F(ChineseScriptClassifierTest, Simplified) {

+ // ChineseScriptClassifier returns zh-Hans in this case.

+ const std::vector<std::string> zh_hans_strings = {

+ "正体字/繁体字", "台湾", "中国", "简化字", "经举发后仍不办理而行驶"};

+ for (const auto& zh_hans_string : zh_hans_strings) {

+ EXPECT_EQ("zh-Hans", classifier_.Classify(zh_hans_string));

+ }

+TEST_F(ChineseScriptClassifierTest, Traditional) {

+ // ChineseScriptClassifier returns zh-Hant in this case.

+ const std::vector<std::string> zh_hant_strings = {

+ "正體字/繁體字", "臺灣", "美國", "簡化字", "經舉發後仍不辦理而行駛"};

+ for (const auto& zh_hant_string : zh_hant_strings) {

+ EXPECT_EQ("zh-Hant", classifier_.Classify(zh_hant_string));

+ }

+TEST_F(ChineseScriptClassifierTest, AmbiguousWithOnlyCharsValidForBothScripts) {

+ // ChineseScriptClassifier returns zh-Hans in this case.

+ const std::vector<std::string> zh_strings = {"我看到你", "你好",

+ "我有很多工作要做"};

+ for (const auto& zh_string : zh_strings) {

+ EXPECT_EQ("zh-Hans", classifier_.Classify(zh_string)) << zh_string;

+ }

+ // ChineseScriptClassifier should not be used for non-Chinese text, but will

+ // return zh-Hans in this case.

+ const std::vector<std::string> non_zh_strings = {"", " ",

+ "This is English text."};

+ for (const auto& non_zh_string : non_zh_strings) {

+ EXPECT_EQ("zh-Hans", classifier_.Classify(non_zh_string)) << non_zh_string;

+ }

+TEST_F(ChineseScriptClassifierTest,

+ AmbiguousWithMixedSimplifiedOnlyAndTraditionalOnly) {

+ // ChineseScriptClassifier returns zh-Hans in this case.

+ const std::vector<std::pair<std::string, std::string>> ambiguous_zh_strings =

+ {

+ // 4 zh-Hant chars and 1 zh-Hans char.

+ {"國國國國国", "zh-Hant"},

+ // 1 zh-Hant char and 4 zh-Hans chars.

+ {"國国国国国", "zh-Hans"},

+ };

+ for (const auto& ambiguous_item : ambiguous_zh_strings) {

+ EXPECT_EQ(ambiguous_item.second,

+ classifier_.Classify(ambiguous_item.first));

+ }

+} // namespace

+} // namespace translate