OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.core.client.JavaScriptObject; | |
7 import com.google.gwt.regexp.shared.RegExp; | 8 import com.google.gwt.regexp.shared.RegExp; |
8 | 9 |
9 public class StringUtil { | 10 public class StringUtil { |
10 // For the whitespace-related functions below, Java's and Javascript's versi ons of '\s' and '\S' | 11 // For the whitespace-related functions below, Java's and Javascript's versi ons of '\s' and '\S' |
11 // are different. E.g. java doesn't recognize in a text node as whit espace but | 12 // are different. E.g. java doesn't recognize in a text node as whit espace but |
12 // javascript does. The former causes GWT tests to fail; the latter is what we want. | 13 // javascript does. The former causes GWT tests to fail; the latter is what we want. |
13 // Don't use the "g" global search flag, or subsequent searches, even with d ifferent Character | 14 // Don't use the "g" global search flag, or subsequent searches, even with d ifferent Character |
14 // or String, become unpredictable. | 15 // or String, become unpredictable. |
15 | 16 |
16 public static native boolean isWhitespace(Character c) /*-{ | 17 public static native boolean isWhitespace(Character c) /*-{ |
(...skipping 18 matching lines...) Expand all Loading... | |
35 } | 36 } |
36 | 37 |
37 public static boolean match(String input, String regex) { | 38 public static boolean match(String input, String regex) { |
38 return RegExp.compile(regex, "i").test(input); | 39 return RegExp.compile(regex, "i").test(input); |
39 } | 40 } |
40 | 41 |
41 public static String findAndReplace(String input, String regex, String repla ce) { | 42 public static String findAndReplace(String input, String regex, String repla ce) { |
42 return RegExp.compile(regex, "gi").replace(input, replace); | 43 return RegExp.compile(regex, "gi").replace(input, replace); |
43 } | 44 } |
44 | 45 |
45 public static native boolean containsWordCharacter(String s) /*-{ | 46 /** |
46 return /[\w\u00C0-\u1FFF\u2C00-\uD7FF]/.test(s); | 47 * For some languages, counting the number of words relies on non-trivial wo rd |
47 }-*/; | 48 * segmentation algorithms, or even huge look-up tables. This function needs to |
49 * be reasonably fast, so the word count for some languages would only be an | |
50 * approximation. | |
51 * Read https://crbug.com/484750 for more info. | |
52 */ | |
53 private static interface CountWords { | |
cjhopman
2015/05/29 19:38:57
nit: probably should be s/CountWords/WordCounter
wychen
2015/05/31 09:04:03
Done.
| |
54 public int countWords(String s); | |
55 } | |
48 | 56 |
49 public static native int countWords(String s) /*-{ | 57 private static class FullWordCounting implements CountWords { |
50 var m = s.match(/(\S*[\w\u00C0-\u1FFF\u2C00-\uD7FF]\S*)/g); | 58 public native int countWords(String s) /*-{ |
51 return m ? m.length : 0; | 59 // The following range includes broader alphabetical letters and Han gul Syllables. |
52 }-*/; | 60 var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); |
61 var c = (m ? m.length : 0); | |
62 // The following range includes Hiragana, Katakana, and CJK Unified Ideographs. | |
63 // Hangul Syllables are not included. | |
64 m = s.match(/([\u3040-\uA4CF])/g); | |
65 c += Math.ceil((m ? m.length : 0) * 0.55); | |
66 return c; | |
67 }-*/; | |
68 } | |
69 | |
70 private static class LetterWordCounting implements CountWords { | |
71 public native int countWords(String s) /*-{ | |
72 // The following range includes broader alphabetical letters and Han gul Syllables. | |
73 var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); | |
74 return (m ? m.length : 0); | |
75 }-*/; | |
76 } | |
77 | |
78 private static class FastWordCounting implements CountWords { | |
79 public native int countWords(String s) /*-{ | |
80 // The following range includes broader alphabetical letters. | |
81 var m = s.match(/(\S*[\w\u00C0-\u1FFF]\S*)/g); | |
82 return (m ? m.length : 0); | |
83 }-*/; | |
84 } | |
85 | |
86 public static void selectCountWordsFunc(String text) { | |
87 final RegExp rFull = RegExp.compile("[\\u3040-\\uA4CF]", "g"); | |
88 final RegExp rLetter = RegExp.compile("[\\uAC00-\\uD7AF]", "g"); | |
89 | |
90 if (rFull.test(text)) { | |
91 _countWords = new FullWordCounting(); | |
92 } else if (rLetter.test(text)) { | |
93 _countWords = new LetterWordCounting(); | |
94 } else { | |
95 _countWords = new FastWordCounting(); | |
96 } | |
97 } | |
98 | |
99 // Use the safest version of countWords as the default. | |
100 static CountWords _countWords = new FullWordCounting(); | |
cjhopman
2015/05/29 19:38:57
s/_countWords/sCountWords
in fact, I'd probably c
wychen
2015/05/31 09:04:03
Done.
| |
101 | |
102 public static int countWords(String s) { | |
103 return _countWords.countWords(s); | |
104 }; | |
53 | 105 |
54 public static native String regexEscape(String s) /*-{ | 106 public static native String regexEscape(String s) /*-{ |
55 return s.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&"); | 107 return s.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&"); |
56 }-*/; | 108 }-*/; |
57 | 109 |
58 /* | 110 /* |
59 * Returns true if character is a digit. | 111 * Returns true if character is a digit. |
60 */ | 112 */ |
61 public static native boolean isDigit(Character c) /*-{ | 113 public static native boolean isDigit(Character c) /*-{ |
62 return /\d/.test(c); | 114 return /\d/.test(c); |
(...skipping 16 matching lines...) Expand all Loading... | |
79 /** | 131 /** |
80 * Returns the plain number if given string can be converted to one >= 0. | 132 * Returns the plain number if given string can be converted to one >= 0. |
81 * Returns -1 if string is empty or not all digits. | 133 * Returns -1 if string is empty or not all digits. |
82 */ | 134 */ |
83 public static int toNumber(String s) { | 135 public static int toNumber(String s) { |
84 if (s.isEmpty() || !StringUtil.isStringAllDigits(s)) return -1; | 136 if (s.isEmpty() || !StringUtil.isStringAllDigits(s)) return -1; |
85 return JavaScript.parseInt(s, 10); | 137 return JavaScript.parseInt(s, 10); |
86 } | 138 } |
87 | 139 |
88 } | 140 } |
OLD | NEW |