OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.regexp.shared.RegExp; | 7 import com.google.gwt.regexp.shared.RegExp; |
8 | 8 |
9 public class StringUtilTest extends JsTestCase { | 9 public class StringUtilTest extends JsTestCase { |
10 public void testCountWords() { | 10 public void testCountWords() { |
11 assertEquals(0, StringUtil.countWords("")); | 11 assertEquals(0, StringUtil.countWords("")); |
12 assertEquals(0, StringUtil.countWords(" -@# ';]")); | 12 assertEquals(0, StringUtil.countWords(" -@# ';]")); |
13 assertEquals(1, StringUtil.countWords("word")); | 13 assertEquals(1, StringUtil.countWords("word")); |
14 assertEquals(1, StringUtil.countWords("b'fore")); | 14 assertEquals(1, StringUtil.countWords("b'fore")); |
15 assertEquals(1, StringUtil.countWords(" _word.under_score_ ")); | 15 assertEquals(1, StringUtil.countWords(" _word.under_score_ ")); |
16 assertEquals(2, StringUtil.countWords(" \ttwo\nwords")); | 16 assertEquals(2, StringUtil.countWords(" \ttwo\nwords")); |
17 assertEquals(2, StringUtil.countWords(" \ttwo @^@^&(@#$([][;;\nwords")); | 17 assertEquals(2, StringUtil.countWords(" \ttwo @^@^&(@#$([][;;\nwords")); |
18 assertEquals(5, StringUtil.countWords("dør når på svært dårlig")); | 18 assertEquals(5, StringUtil.countWords("dør når på svært dårlig")); |
19 assertEquals(5, StringUtil.countWords("svært få dør av blåbærsyltetøy")) ; | 19 assertEquals(5, StringUtil.countWords("svært få dør av blåbærsyltetøy")) ; |
20 | |
21 // One Chinese sentence, or a series of Japanese glyphs should not be tr eated | |
22 // as a single word. | |
23 assertTrue(StringUtil.countWords("一個中文句子不應該當成一個字") > 1); // zh-Hant | |
24 assertTrue(StringUtil.countWords("中国和马来西亚使用简体字") > 1); // zh-Hans | |
25 assertTrue(StringUtil.countWords("ファイナルファンタジー") > 1); // Katakana | |
26 assertTrue(StringUtil.countWords("いってらっしゃい") > 1); // Hiragana | |
27 assertTrue(StringUtil.countWords("仏仮駅辺") > 1); // Kanji | |
28 // However, treating each Chinese/Japanese glyph as a word is also wrong . | |
29 assertTrue(StringUtil.countWords("一個中文句子不應該當成一個字") < 14); | |
30 assertTrue(StringUtil.countWords("中国和马来西亚使用简体字") < 12); | |
31 assertTrue(StringUtil.countWords("ファイナルファンタジー") < 11); | |
32 assertTrue(StringUtil.countWords("いってらっしゃい") < 8); | |
33 assertTrue(StringUtil.countWords("仏仮駅辺") < 4); | |
34 // Even if they are separated by spaces. | |
35 assertTrue(StringUtil.countWords("一 個 中 文 句 子 不 應 該 當 成 一 個 字") < 14); | |
36 assertTrue(StringUtil.countWords("中 国 和 马 来 西 亚 使 用 简 体 字") < 12); | |
37 assertTrue(StringUtil.countWords("フ ァ イ ナ ル フ ァ ン タ ジ ー") < 11); | |
38 assertTrue(StringUtil.countWords("い っ て ら っ し ゃ い") < 8); | |
39 assertTrue(StringUtil.countWords("仏 仮 駅 辺") < 4); | |
40 | |
41 assertEquals(1, StringUtil.countWords("字")); | |
42 assertEquals(1, StringUtil.countWords("が")); | |
43 | |
44 // Mixing ASCII words and Chinese/Japanese glyphs | |
45 assertEquals(2, StringUtil.countWords("word字")); | |
46 assertEquals(2, StringUtil.countWords("word 字")); | |
47 | |
48 // Hangul uses space as word delimiter like English. | |
49 assertEquals(1, StringUtil.countWords("어")); | |
50 assertEquals(2, StringUtil.countWords("한국어 단어")); | |
51 assertEquals(5, StringUtil.countWords("한 국 어 단 어")); | |
52 assertEquals(8, StringUtil.countWords( | |
53 "예비군 훈련장 총기 난사범 최모씨의 군복에서 발견된 유서.")); | |
54 } | |
55 | |
56 public void testCountWordsFast() { | |
57 StringUtil.selectCountWordsFunc(""); | |
cjhopman
2015/05/29 19:38:57
This should probably use the inner classes directl
wychen
2015/05/31 09:04:03
Done.
| |
58 assertEquals(0, StringUtil.countWords("어")); | |
59 StringUtil.selectCountWordsFunc("어"); | |
60 assertEquals(1, StringUtil.countWords("어")); | |
61 assertEquals(0, StringUtil.countWords("字")); | |
62 StringUtil.selectCountWordsFunc("字"); | |
63 assertEquals(1, StringUtil.countWords("字")); | |
20 } | 64 } |
21 | 65 |
22 public void testIsWhitespace() { | 66 public void testIsWhitespace() { |
23 assertTrue(StringUtil.isWhitespace(' ')); | 67 assertTrue(StringUtil.isWhitespace(' ')); |
24 assertTrue(StringUtil.isWhitespace('\t')); | 68 assertTrue(StringUtil.isWhitespace('\t')); |
25 assertTrue(StringUtil.isWhitespace('\n')); | 69 assertTrue(StringUtil.isWhitespace('\n')); |
26 assertTrue(StringUtil.isWhitespace('\u00a0')); | 70 assertTrue(StringUtil.isWhitespace('\u00a0')); |
27 assertFalse(StringUtil.isWhitespace('a')); | 71 assertFalse(StringUtil.isWhitespace('a')); |
28 assertFalse(StringUtil.isWhitespace('$')); | 72 assertFalse(StringUtil.isWhitespace('$')); |
29 assertFalse(StringUtil.isWhitespace('_')); | 73 assertFalse(StringUtil.isWhitespace('_')); |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
107 assertEquals(-1, StringUtil.toNumber("'8_")); | 151 assertEquals(-1, StringUtil.toNumber("'8_")); |
108 assertEquals(-1, StringUtil.toNumber("")); | 152 assertEquals(-1, StringUtil.toNumber("")); |
109 assertEquals(-1, StringUtil.toNumber(" ")); | 153 assertEquals(-1, StringUtil.toNumber(" ")); |
110 assertEquals(-1, StringUtil.toNumber("\u00a0\u0460")); | 154 assertEquals(-1, StringUtil.toNumber("\u00a0\u0460")); |
111 assertEquals(-1, StringUtil.toNumber("abc")); | 155 assertEquals(-1, StringUtil.toNumber("abc")); |
112 assertEquals(-1, StringUtil.toNumber("$")); | 156 assertEquals(-1, StringUtil.toNumber("$")); |
113 assertEquals(-1, StringUtil.toNumber("_")); | 157 assertEquals(-1, StringUtil.toNumber("_")); |
114 } | 158 } |
115 | 159 |
116 } | 160 } |
OLD | NEW |