Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(80)

Unified Diff: components/translate/core/language_detection/chinese_script_classifier_test.cc

Issue 2756313002: [Merge M-58] Adds ChineseScriptClassifier to predict zh-Hant or zh-Hans for input detected as zh. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/translate/core/language_detection/chinese_script_classifier_test.cc
diff --git a/components/translate/core/language_detection/chinese_script_classifier_test.cc b/components/translate/core/language_detection/chinese_script_classifier_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d95b0c1ac61401fc9df2ce7b9808f8f99be0ab90
--- /dev/null
+++ b/components/translate/core/language_detection/chinese_script_classifier_test.cc
@@ -0,0 +1,72 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/translate/core/language_detection/chinese_script_classifier.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace translate {
+namespace {
+
+class ChineseScriptClassifierTest : public testing::Test {
+ protected:
+ ChineseScriptClassifier classifier_;
+};
+
+TEST_F(ChineseScriptClassifierTest, Simplified) {
+ // ChineseScriptClassifier returns zh-Hans in this case.
+ const std::vector<std::string> zh_hans_strings = {
+ "正体字/繁体字", "台湾", "中国", "简化字", "经举发后仍不办理而行驶"};
+ for (const auto& zh_hans_string : zh_hans_strings) {
+ EXPECT_EQ("zh-Hans", classifier_.Classify(zh_hans_string));
+ }
+}
+
+TEST_F(ChineseScriptClassifierTest, Traditional) {
+ // ChineseScriptClassifier returns zh-Hant in this case.
+ const std::vector<std::string> zh_hant_strings = {
+ "正體字/繁體字", "臺灣", "美國", "簡化字", "經舉發後仍不辦理而行駛"};
+ for (const auto& zh_hant_string : zh_hant_strings) {
+ EXPECT_EQ("zh-Hant", classifier_.Classify(zh_hant_string));
+ }
+}
+
+TEST_F(ChineseScriptClassifierTest, AmbiguousWithOnlyCharsValidForBothScripts) {
+ // ChineseScriptClassifier returns zh-Hans in this case.
+ const std::vector<std::string> zh_strings = {"我看到你", "你好",
+ "我有很多工作要做"};
+ for (const auto& zh_string : zh_strings) {
+ EXPECT_EQ("zh-Hans", classifier_.Classify(zh_string)) << zh_string;
+ }
+
+ // ChineseScriptClassifier should not be used for non-Chinese text, but will
+ // return zh-Hans in this case.
+ const std::vector<std::string> non_zh_strings = {"", " ",
+ "This is English text."};
+ for (const auto& non_zh_string : non_zh_strings) {
+ EXPECT_EQ("zh-Hans", classifier_.Classify(non_zh_string)) << non_zh_string;
+ }
+}
+
+TEST_F(ChineseScriptClassifierTest,
+ AmbiguousWithMixedSimplifiedOnlyAndTraditionalOnly) {
+ // ChineseScriptClassifier returns zh-Hans in this case.
+ const std::vector<std::pair<std::string, std::string>> ambiguous_zh_strings =
+ {
+ // 4 zh-Hant chars and 1 zh-Hans char.
+ {"國國國國国", "zh-Hant"},
+ // 1 zh-Hant char and 4 zh-Hans chars.
+ {"國国国国国", "zh-Hans"},
+ };
+ for (const auto& ambiguous_item : ambiguous_zh_strings) {
+ EXPECT_EQ(ambiguous_item.second,
+ classifier_.Classify(ambiguous_item.first));
+ }
+}
+
+} // namespace
+} // namespace translate

Powered by Google App Engine
This is Rietveld 408576698