Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import com.google.gwt.regexp.shared.RegExp; | 7 import com.google.gwt.regexp.shared.RegExp; |
| 8 | 8 |
| 9 public class StringUtilTest extends JsTestCase { | 9 public class StringUtilTest extends JsTestCase { |
| 10 public void testCountWords() { | 10 public void testCountWords() { |
| 11 assertEquals(0, StringUtil.countWords("")); | 11 assertEquals(0, StringUtil.countWords("")); |
| 12 assertEquals(0, StringUtil.countWords(" -@# ';]")); | 12 assertEquals(0, StringUtil.countWords(" -@# ';]")); |
| 13 assertEquals(1, StringUtil.countWords("word")); | 13 assertEquals(1, StringUtil.countWords("word")); |
| 14 assertEquals(1, StringUtil.countWords("b'fore")); | 14 assertEquals(1, StringUtil.countWords("b'fore")); |
| 15 assertEquals(1, StringUtil.countWords(" _word.under_score_ ")); | 15 assertEquals(1, StringUtil.countWords(" _word.under_score_ ")); |
| 16 assertEquals(2, StringUtil.countWords(" \ttwo\nwords")); | 16 assertEquals(2, StringUtil.countWords(" \ttwo\nwords")); |
| 17 assertEquals(2, StringUtil.countWords(" \ttwo @^@^&(@#$([][;;\nwords")); | 17 assertEquals(2, StringUtil.countWords(" \ttwo @^@^&(@#$([][;;\nwords")); |
| 18 assertEquals(5, StringUtil.countWords("dør når på svært dårlig")); | 18 assertEquals(5, StringUtil.countWords("dør når på svært dårlig")); |
| 19 assertEquals(5, StringUtil.countWords("svært få dør av blåbærsyltetøy")) ; | 19 assertEquals(5, StringUtil.countWords("svært få dør av blåbærsyltetøy")) ; |
| 20 | |
| 21 // One Chinese sentence, or a series of Japanese glyphs should not be tr eated | |
| 22 // as a single word. | |
| 23 assertTrue(StringUtil.countWords("一個中文句子不應該當成一個字") > 1); | |
| 24 assertTrue(StringUtil.countWords("ファイナルファンタジー") > 1); | |
| 25 // However, treating each Chinese/Japanese glyph as a word is also wrong . | |
| 26 assertTrue(StringUtil.countWords("一個中文句子不應該當成一個字") < 14); | |
| 27 assertTrue(StringUtil.countWords("ファイナルファンタジー") < 11); | |
|
kuan
2015/05/15 01:45:07
this is a katakana sentence, how about also adding
wychen
2015/05/18 18:49:20
All added.
kuan
2015/05/21 17:46:15
thanks!
| |
| 28 // Even if they are separated by spaces. | |
| 29 assertTrue(StringUtil.countWords("一 個 中 文 句 子 不 應 該 當 成 一 個 字") < 14); | |
| 30 assertTrue(StringUtil.countWords("フ ァ イ ナ ル フ ァ ン タ ジ ー") < 11); | |
| 31 | |
| 32 assertEquals(1, StringUtil.countWords("字")); | |
| 33 assertEquals(1, StringUtil.countWords("が")); | |
| 34 | |
| 35 // Mixing ASCII words and Chinese/Japanese glyphs | |
| 36 assertEquals(2, StringUtil.countWords("word字")); | |
| 37 assertEquals(2, StringUtil.countWords("word 字")); | |
| 38 | |
| 39 // Hangul uses space as word delimiter like English. | |
| 40 assertEquals(1, StringUtil.countWords("어")); | |
| 41 assertEquals(2, StringUtil.countWords("한국어 단어")); | |
| 42 assertEquals(5, StringUtil.countWords("한 국 어 단 어")); | |
| 43 assertEquals(8, StringUtil.countWords( | |
| 44 "예비군 훈련장 총기 난사범 최모씨의 군복에서 발견된 유서.")); | |
| 20 } | 45 } |
| 21 | 46 |
| 22 public void testIsWhitespace() { | 47 public void testIsWhitespace() { |
| 23 assertTrue(StringUtil.isWhitespace(' ')); | 48 assertTrue(StringUtil.isWhitespace(' ')); |
| 24 assertTrue(StringUtil.isWhitespace('\t')); | 49 assertTrue(StringUtil.isWhitespace('\t')); |
| 25 assertTrue(StringUtil.isWhitespace('\n')); | 50 assertTrue(StringUtil.isWhitespace('\n')); |
| 26 assertTrue(StringUtil.isWhitespace('\u00a0')); | 51 assertTrue(StringUtil.isWhitespace('\u00a0')); |
| 27 assertFalse(StringUtil.isWhitespace('a')); | 52 assertFalse(StringUtil.isWhitespace('a')); |
| 28 assertFalse(StringUtil.isWhitespace('$')); | 53 assertFalse(StringUtil.isWhitespace('$')); |
| 29 assertFalse(StringUtil.isWhitespace('_')); | 54 assertFalse(StringUtil.isWhitespace('_')); |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 107 assertEquals(-1, StringUtil.toNumber("'8_")); | 132 assertEquals(-1, StringUtil.toNumber("'8_")); |
| 108 assertEquals(-1, StringUtil.toNumber("")); | 133 assertEquals(-1, StringUtil.toNumber("")); |
| 109 assertEquals(-1, StringUtil.toNumber(" ")); | 134 assertEquals(-1, StringUtil.toNumber(" ")); |
| 110 assertEquals(-1, StringUtil.toNumber("\u00a0\u0460")); | 135 assertEquals(-1, StringUtil.toNumber("\u00a0\u0460")); |
| 111 assertEquals(-1, StringUtil.toNumber("abc")); | 136 assertEquals(-1, StringUtil.toNumber("abc")); |
| 112 assertEquals(-1, StringUtil.toNumber("$")); | 137 assertEquals(-1, StringUtil.toNumber("$")); |
| 113 assertEquals(-1, StringUtil.toNumber("_")); | 138 assertEquals(-1, StringUtil.toNumber("_")); |
| 114 } | 139 } |
| 115 | 140 |
| 116 } | 141 } |
| OLD | NEW |