OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.regexp.shared.RegExp; | 7 import com.google.gwt.regexp.shared.RegExp; |
| 8 import java.util.ArrayList; |
| 9 import java.util.List; |
8 | 10 |
9 public class StringUtilTest extends JsTestCase { | 11 public class StringUtilTest extends JsTestCase { |
| 12 public void testFastWordCounter() { |
| 13 List<StringUtil.WordCounter> counters = new ArrayList(); |
| 14 counters.add(new StringUtil.FastWordCounter()); |
| 15 counters.add(new StringUtil.LetterWordCounter()); |
| 16 counters.add(new StringUtil.FullWordCounter()); |
| 17 |
| 18 for(StringUtil.WordCounter counter: counters) { |
| 19 assertEquals(0, counter.count("")); |
| 20 assertEquals(0, counter.count(" -@# ';]")); |
| 21 assertEquals(1, counter.count("word")); |
| 22 assertEquals(1, counter.count("b'fore")); |
| 23 assertEquals(1, counter.count(" _word.under_score_ ")); |
| 24 assertEquals(2, counter.count(" \ttwo\nwords")); |
| 25 assertEquals(2, counter.count(" \ttwo @^@^&(@#$([][;;\nwords")); |
| 26 // Norwegian |
| 27 assertEquals(5, counter.count("dør når på svært dårlig")); |
| 28 assertEquals(5, counter.count("svært få dør av blåbærsyltetøy")); |
| 29 // Greek |
| 30 assertEquals(11, counter.count( |
| 31 "Παρέμβαση των ΗΠΑ για τα τεχνητά νησιά που κατασκευάζει η Κ
ίνα")); |
| 32 // Arabic |
| 33 assertEquals(6, counter.count("زلزال بقوة 8.5 درجات يضرب اليابان")); |
| 34 // Tibetan |
| 35 assertEquals(1, counter.count("༧གོང་ས་མཆོག་གི་ནང་གི་ངོ་སྤྲོད་ཀྱི་གསུ
ང་ཆོས་ལེགས་གྲུབ།")); |
| 36 // Thai |
| 37 assertEquals(3, counter.count("โซลาร์ อิมพัลส์ทู เหินฟ้าข้ามมหาสมุทร
")); |
| 38 } |
| 39 } |
| 40 |
| 41 public void testLetterWordCounter() { |
| 42 List<StringUtil.WordCounter> counters = new ArrayList(); |
| 43 counters.add(new StringUtil.LetterWordCounter()); |
| 44 counters.add(new StringUtil.FullWordCounter()); |
| 45 |
| 46 for(StringUtil.WordCounter counter: counters) { |
| 47 // Hangul uses space as word delimiter like English. |
| 48 assertEquals(1, counter.count("어")); |
| 49 assertEquals(2, counter.count("한국어 단어")); |
| 50 assertEquals(5, counter.count("한 국 어 단 어")); |
| 51 assertEquals(8, counter.count( |
| 52 "예비군 훈련장 총기 난사범 최모씨의 군복에서 발견된 유서.")); |
| 53 } |
| 54 } |
| 55 |
| 56 public void testFullWordCounter() { |
| 57 StringUtil.WordCounter counter = new StringUtil.FullWordCounter(); |
| 58 // One Chinese sentence, or a series of Japanese glyphs should not be tr
eated |
| 59 // as a single word. |
| 60 assertTrue(counter.count("一個中文句子不應該當成一個字") > 1); // zh-Hant |
| 61 assertTrue(counter.count("中国和马来西亚使用简体字") > 1); // zh-Hans |
| 62 assertTrue(counter.count("ファイナルファンタジー") > 1); // Katakana |
| 63 assertTrue(counter.count("いってらっしゃい") > 1); // Hiragana |
| 64 assertTrue(counter.count("仏仮駅辺") > 1); // Kanji |
| 65 // However, treating each Chinese/Japanese glyph as a word is also wrong
. |
| 66 assertTrue(counter.count("一個中文句子不應該當成一個字") < 14); |
| 67 assertTrue(counter.count("中国和马来西亚使用简体字") < 12); |
| 68 assertTrue(counter.count("ファイナルファンタジー") < 11); |
| 69 assertTrue(counter.count("いってらっしゃい") < 8); |
| 70 assertTrue(counter.count("仏仮駅辺") < 4); |
| 71 // Even if they are separated by spaces. |
| 72 assertTrue(counter.count("一 個 中 文 句 子 不 應 該 當 成 一 個 字") < 14); |
| 73 assertTrue(counter.count("中 国 和 马 来 西 亚 使 用 简 体 字") < 12); |
| 74 assertTrue(counter.count("フ ァ イ ナ ル フ ァ ン タ ジ ー") < 11); |
| 75 assertTrue(counter.count("い っ て ら っ し ゃ い") < 8); |
| 76 assertTrue(counter.count("仏 仮 駅 辺") < 4); |
| 77 |
| 78 assertEquals(1, counter.count("字")); |
| 79 assertEquals(1, counter.count("が")); |
| 80 |
| 81 // Mixing ASCII words and Chinese/Japanese glyphs |
| 82 assertEquals(2, counter.count("word字")); |
| 83 assertEquals(2, counter.count("word 字")); |
| 84 } |
| 85 |
| 86 public void testSelectWordCounter() { |
| 87 StringUtil.WordCounter counter; |
| 88 |
| 89 counter = StringUtil.selectWordCounter("abc"); |
| 90 assertTrue(counter instanceof StringUtil.FastWordCounter); |
| 91 |
| 92 counter = StringUtil.selectWordCounter("어"); |
| 93 assertTrue(counter instanceof StringUtil.LetterWordCounter); |
| 94 |
| 95 counter = StringUtil.selectWordCounter("字"); |
| 96 assertTrue(counter instanceof StringUtil.FullWordCounter); |
| 97 } |
| 98 |
10 public void testCountWords() { | 99 public void testCountWords() { |
11 assertEquals(0, StringUtil.countWords("")); | 100 StringUtil.setWordCounter(""); |
12 assertEquals(0, StringUtil.countWords(" -@# ';]")); | 101 assertEquals(2, StringUtil.countWords("two words")); |
13 assertEquals(1, StringUtil.countWords("word")); | 102 assertEquals(0, StringUtil.countWords("어")); |
14 assertEquals(1, StringUtil.countWords("b'fore")); | 103 StringUtil.setWordCounter("어"); |
15 assertEquals(1, StringUtil.countWords(" _word.under_score_ ")); | 104 assertEquals(1, StringUtil.countWords("어")); |
16 assertEquals(2, StringUtil.countWords(" \ttwo\nwords")); | 105 assertEquals(0, StringUtil.countWords("字")); |
17 assertEquals(2, StringUtil.countWords(" \ttwo @^@^&(@#$([][;;\nwords")); | 106 StringUtil.setWordCounter("字"); |
18 assertEquals(5, StringUtil.countWords("dør når på svært dårlig")); | 107 assertEquals(1, StringUtil.countWords("字")); |
19 assertEquals(5, StringUtil.countWords("svært få dør av blåbærsyltetøy"))
; | 108 // Make sure the internal WordCounter is restored to FullWordCounter in
the end. |
20 } | 109 } |
21 | 110 |
22 public void testIsWhitespace() { | 111 public void testIsWhitespace() { |
23 assertTrue(StringUtil.isWhitespace(' ')); | 112 assertTrue(StringUtil.isWhitespace(' ')); |
24 assertTrue(StringUtil.isWhitespace('\t')); | 113 assertTrue(StringUtil.isWhitespace('\t')); |
25 assertTrue(StringUtil.isWhitespace('\n')); | 114 assertTrue(StringUtil.isWhitespace('\n')); |
26 assertTrue(StringUtil.isWhitespace('\u00a0')); | 115 assertTrue(StringUtil.isWhitespace('\u00a0')); |
27 assertFalse(StringUtil.isWhitespace('a')); | 116 assertFalse(StringUtil.isWhitespace('a')); |
28 assertFalse(StringUtil.isWhitespace('$')); | 117 assertFalse(StringUtil.isWhitespace('$')); |
29 assertFalse(StringUtil.isWhitespace('_')); | 118 assertFalse(StringUtil.isWhitespace('_')); |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
107 assertEquals(-1, StringUtil.toNumber("'8_")); | 196 assertEquals(-1, StringUtil.toNumber("'8_")); |
108 assertEquals(-1, StringUtil.toNumber("")); | 197 assertEquals(-1, StringUtil.toNumber("")); |
109 assertEquals(-1, StringUtil.toNumber(" ")); | 198 assertEquals(-1, StringUtil.toNumber(" ")); |
110 assertEquals(-1, StringUtil.toNumber("\u00a0\u0460")); | 199 assertEquals(-1, StringUtil.toNumber("\u00a0\u0460")); |
111 assertEquals(-1, StringUtil.toNumber("abc")); | 200 assertEquals(-1, StringUtil.toNumber("abc")); |
112 assertEquals(-1, StringUtil.toNumber("$")); | 201 assertEquals(-1, StringUtil.toNumber("$")); |
113 assertEquals(-1, StringUtil.toNumber("_")); | 202 assertEquals(-1, StringUtil.toNumber("_")); |
114 } | 203 } |
115 | 204 |
116 } | 205 } |
OLD | NEW |