OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.core.client.JavaScriptObject; | |
8 import com.google.gwt.regexp.shared.RegExp; | 7 import com.google.gwt.regexp.shared.RegExp; |
9 | 8 |
10 public class StringUtil { | 9 public class StringUtil { |
11 // For the whitespace-related functions below, Java's and Javascript's versi
ons of '\s' and '\S' | 10 // For the whitespace-related functions below, Java's and Javascript's versi
ons of '\s' and '\S' |
12 // are different. E.g. java doesn't recognize in a text node as whit
espace but | 11 // are different. E.g. java doesn't recognize in a text node as whit
espace but |
13 // javascript does. The former causes GWT tests to fail; the latter is what
we want. | 12 // javascript does. The former causes GWT tests to fail; the latter is what
we want. |
14 // Don't use the "g" global search flag, or subsequent searches, even with d
ifferent Character | 13 // Don't use the "g" global search flag, or subsequent searches, even with d
ifferent Character |
15 // or String, become unpredictable. | 14 // or String, become unpredictable. |
16 | 15 |
17 public static native boolean isWhitespace(Character c) /*-{ | 16 public static native boolean isWhitespace(Character c) /*-{ |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
56 * segmentation algorithms, or even huge look-up tables. This function needs
to | 55 * segmentation algorithms, or even huge look-up tables. This function needs
to |
57 * be reasonably fast, so the word count for some languages would only be an | 56 * be reasonably fast, so the word count for some languages would only be an |
58 * approximation. | 57 * approximation. |
59 * Read https://crbug.com/484750 for more info. | 58 * Read https://crbug.com/484750 for more info. |
60 */ | 59 */ |
61 public static interface WordCounter { | 60 public static interface WordCounter { |
62 public int count(String s); | 61 public int count(String s); |
63 } | 62 } |
64 | 63 |
65 public static class FullWordCounter implements WordCounter { | 64 public static class FullWordCounter implements WordCounter { |
| 65 @Override |
66 public native int count(String s) /*-{ | 66 public native int count(String s) /*-{ |
67 // The following range includes broader alphabetical letters and Han
gul Syllables. | 67 // The following range includes broader alphabetical letters and Han
gul Syllables. |
68 var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); | 68 var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); |
69 var c = (m ? m.length : 0); | 69 var c = (m ? m.length : 0); |
70 // The following range includes Hiragana, Katakana, and CJK Unified
Ideographs. | 70 // The following range includes Hiragana, Katakana, and CJK Unified
Ideographs. |
71 // Hangul Syllables are not included. | 71 // Hangul Syllables are not included. |
72 m = s.match(/([\u3040-\uA4CF])/g); | 72 m = s.match(/([\u3040-\uA4CF])/g); |
73 c += Math.ceil((m ? m.length : 0) * 0.55); | 73 c += Math.ceil((m ? m.length : 0) * 0.55); |
74 return c; | 74 return c; |
75 }-*/; | 75 }-*/; |
76 } | 76 } |
77 | 77 |
78 public static class LetterWordCounter implements WordCounter { | 78 public static class LetterWordCounter implements WordCounter { |
| 79 @Override |
79 public native int count(String s) /*-{ | 80 public native int count(String s) /*-{ |
80 // The following range includes broader alphabetical letters and Han
gul Syllables. | 81 // The following range includes broader alphabetical letters and Han
gul Syllables. |
81 var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); | 82 var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); |
82 return (m ? m.length : 0); | 83 return (m ? m.length : 0); |
83 }-*/; | 84 }-*/; |
84 } | 85 } |
85 | 86 |
86 public static class FastWordCounter implements WordCounter { | 87 public static class FastWordCounter implements WordCounter { |
| 88 @Override |
87 public native int count(String s) /*-{ | 89 public native int count(String s) /*-{ |
88 // The following range includes broader alphabetical letters. | 90 // The following range includes broader alphabetical letters. |
89 var m = s.match(/(\S*[\w\u00C0-\u1FFF]\S*)/g); | 91 var m = s.match(/(\S*[\w\u00C0-\u1FFF]\S*)/g); |
90 return (m ? m.length : 0); | 92 return (m ? m.length : 0); |
91 }-*/; | 93 }-*/; |
92 } | 94 } |
93 | 95 |
94 public static void setWordCounter(String text) { | 96 public static void setWordCounter(String text) { |
95 sWordCounter = selectWordCounter(text); | 97 sWordCounter = selectWordCounter(text); |
96 } | 98 } |
97 | 99 |
98 public static WordCounter selectWordCounter(String text) { | 100 public static WordCounter selectWordCounter(String text) { |
99 final RegExp rFull = RegExp.compile("[\\u3040-\\uA4CF]", "g"); | 101 final RegExp rFull = RegExp.compile("[\\u3040-\\uA4CF]", "g"); |
100 final RegExp rLetter = RegExp.compile("[\\uAC00-\\uD7AF]", "g"); | 102 final RegExp rLetter = RegExp.compile("[\\uAC00-\\uD7AF]", "g"); |
101 | 103 |
102 if (rFull.test(text)) { | 104 if (rFull.test(text)) { |
103 return new FullWordCounter(); | 105 return new FullWordCounter(); |
104 } else if (rLetter.test(text)) { | 106 } else if (rLetter.test(text)) { |
105 return new LetterWordCounter(); | 107 return new LetterWordCounter(); |
106 } else { | 108 } else { |
107 return new FastWordCounter(); | 109 return new FastWordCounter(); |
108 } | 110 } |
109 } | 111 } |
110 | 112 |
111 // Use the safest version of WordCounter as the default. | 113 // Use the safest version of WordCounter as the default. |
112 static WordCounter sWordCounter = new FullWordCounter(); | 114 static WordCounter sWordCounter = new FullWordCounter(); |
113 | 115 |
114 public static int countWords(String s) { | 116 public static int countWords(String s) { |
115 return sWordCounter.count(s); | 117 return sWordCounter.count(s); |
116 }; | 118 } |
117 | 119 |
118 public static native String regexEscape(String s) /*-{ | 120 public static native String regexEscape(String s) /*-{ |
119 return s.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&"); | 121 return s.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&"); |
120 }-*/; | 122 }-*/; |
121 | 123 |
122 /* | 124 /* |
123 * Returns true if character is a digit. | 125 * Returns true if character is a digit. |
124 */ | 126 */ |
125 public static native boolean isDigit(Character c) /*-{ | 127 public static native boolean isDigit(Character c) /*-{ |
126 return /\d/.test(c); | 128 return /\d/.test(c); |
(...skipping 16 matching lines...) Expand all Loading... |
143 /** | 145 /** |
144 * Returns the plain number if given string can be converted to one >= 0. | 146 * Returns the plain number if given string can be converted to one >= 0. |
145 * Returns -1 if string is empty or not all digits. | 147 * Returns -1 if string is empty or not all digits. |
146 */ | 148 */ |
147 public static int toNumber(String s) { | 149 public static int toNumber(String s) { |
148 if (s.isEmpty() || !StringUtil.isStringAllDigits(s)) return -1; | 150 if (s.isEmpty() || !StringUtil.isStringAllDigits(s)) return -1; |
149 return JavaScript.parseInt(s, 10); | 151 return JavaScript.parseInt(s, 10); |
150 } | 152 } |
151 | 153 |
152 } | 154 } |
OLD | NEW |