Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(228)

Unified Diff: javatests/org/chromium/distiller/StringUtilTest.java

Issue 1131853006: Fix word count issue for Chinese and Japanese (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: reorder tests Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: javatests/org/chromium/distiller/StringUtilTest.java
diff --git a/javatests/org/chromium/distiller/StringUtilTest.java b/javatests/org/chromium/distiller/StringUtilTest.java
index 620fbbcc61e8d4c379aa17c7a1c95c819e6e4dff..84b8227c726ce5d3047d85f067f2b37d34f0bf06 100644
--- a/javatests/org/chromium/distiller/StringUtilTest.java
+++ b/javatests/org/chromium/distiller/StringUtilTest.java
@@ -17,6 +17,31 @@ public class StringUtilTest extends JsTestCase {
assertEquals(2, StringUtil.countWords(" \ttwo @^@^&(@#$([][;;\nwords"));
assertEquals(5, StringUtil.countWords("dør når på svært dårlig"));
assertEquals(5, StringUtil.countWords("svært få dør av blåbærsyltetøy"));
+
+ // One Chinese sentence, or a series of Japanese glyphs should not be treated
+ // as a single word.
+ assertTrue(StringUtil.countWords("一個中文句子不應該當成一個字") > 1);
+ assertTrue(StringUtil.countWords("ファイナルファンタジー") > 1);
+ // However, treating each Chinese/Japanese glyph as a word is also wrong.
+ assertTrue(StringUtil.countWords("一個中文句子不應該當成一個字") < 14);
+ assertTrue(StringUtil.countWords("ファイナルファンタジー") < 11);
kuan 2015/05/15 01:45:07 this is a katakana sentence, how about also adding
wychen 2015/05/18 18:49:20 All added.
kuan 2015/05/21 17:46:15 thanks!
+ // Even if they are separated by spaces.
+ assertTrue(StringUtil.countWords("一 個 中 文 句 子 不 應 該 當 成 一 個 字") < 14);
+ assertTrue(StringUtil.countWords("フ ァ イ ナ ル フ ァ ン タ ジ ー") < 11);
+
+ assertEquals(1, StringUtil.countWords("字"));
+ assertEquals(1, StringUtil.countWords("が"));
+
+ // Mixing ASCII words and Chinese/Japanese glyphs
+ assertEquals(2, StringUtil.countWords("word字"));
+ assertEquals(2, StringUtil.countWords("word 字"));
+
+ // Hangul uses space as word delimiter like English.
+ assertEquals(1, StringUtil.countWords("어"));
+ assertEquals(2, StringUtil.countWords("한국어 단어"));
+ assertEquals(5, StringUtil.countWords("한 국 어 단 어"));
+ assertEquals(8, StringUtil.countWords(
+ "예비군 훈련장 총기 난사범 최모씨의 군복에서 발견된 유서."));
}
public void testIsWhitespace() {
« java/org/chromium/distiller/StringUtil.java ('K') | « java/org/chromium/distiller/StringUtil.java ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698