| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import com.google.gwt.core.client.JavaScriptObject; |
| 7 import com.google.gwt.regexp.shared.RegExp; | 8 import com.google.gwt.regexp.shared.RegExp; |
| 8 | 9 |
| 9 public class StringUtil { | 10 public class StringUtil { |
| 10 // For the whitespace-related functions below, Java's and Javascript's versi
ons of '\s' and '\S' | 11 // For the whitespace-related functions below, Java's and Javascript's versi
ons of '\s' and '\S' |
| 11 // are different. E.g. java doesn't recognize in a text node as whit
espace but | 12 // are different. E.g. java doesn't recognize in a text node as whit
espace but |
| 12 // javascript does. The former causes GWT tests to fail; the latter is what
we want. | 13 // javascript does. The former causes GWT tests to fail; the latter is what
we want. |
| 13 // Don't use the "g" global search flag, or subsequent searches, even with d
ifferent Character | 14 // Don't use the "g" global search flag, or subsequent searches, even with d
ifferent Character |
| 14 // or String, become unpredictable. | 15 // or String, become unpredictable. |
| 15 | 16 |
| 16 public static native boolean isWhitespace(Character c) /*-{ | 17 public static native boolean isWhitespace(Character c) /*-{ |
| (...skipping 18 matching lines...) Expand all Loading... |
| 35 } | 36 } |
| 36 | 37 |
| 37 public static boolean match(String input, String regex) { | 38 public static boolean match(String input, String regex) { |
| 38 return RegExp.compile(regex, "i").test(input); | 39 return RegExp.compile(regex, "i").test(input); |
| 39 } | 40 } |
| 40 | 41 |
| 41 public static String findAndReplace(String input, String regex, String repla
ce) { | 42 public static String findAndReplace(String input, String regex, String repla
ce) { |
| 42 return RegExp.compile(regex, "gi").replace(input, replace); | 43 return RegExp.compile(regex, "gi").replace(input, replace); |
| 43 } | 44 } |
| 44 | 45 |
| 45 public static native boolean containsWordCharacter(String s) /*-{ | 46 /** |
| 46 return /[\w\u00C0-\u1FFF\u2C00-\uD7FF]/.test(s); | 47 * For some languages, counting the number of words relies on non-trivial wo
rd |
| 47 }-*/; | 48 * segmentation algorithms, or even huge look-up tables. This function needs
to |
| 49 * be reasonably fast, so the word count for some languages would only be an |
| 50 * approximation. |
| 51 * Read https://crbug.com/484750 for more info. |
| 52 */ |
| 53 public static interface WordCounter { |
| 54 public int count(String s); |
| 55 } |
| 48 | 56 |
| 49 public static native int countWords(String s) /*-{ | 57 public static class FullWordCounter implements WordCounter { |
| 50 var m = s.match(/(\S*[\w\u00C0-\u1FFF\u2C00-\uD7FF]\S*)/g); | 58 public native int count(String s) /*-{ |
| 51 return m ? m.length : 0; | 59 // The following range includes broader alphabetical letters and Han
gul Syllables. |
| 52 }-*/; | 60 var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); |
| 61 var c = (m ? m.length : 0); |
| 62 // The following range includes Hiragana, Katakana, and CJK Unified
Ideographs. |
| 63 // Hangul Syllables are not included. |
| 64 m = s.match(/([\u3040-\uA4CF])/g); |
| 65 c += Math.ceil((m ? m.length : 0) * 0.55); |
| 66 return c; |
| 67 }-*/; |
| 68 } |
| 69 |
| 70 public static class LetterWordCounter implements WordCounter { |
| 71 public native int count(String s) /*-{ |
| 72 // The following range includes broader alphabetical letters and Han
gul Syllables. |
| 73 var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); |
| 74 return (m ? m.length : 0); |
| 75 }-*/; |
| 76 } |
| 77 |
| 78 public static class FastWordCounter implements WordCounter { |
| 79 public native int count(String s) /*-{ |
| 80 // The following range includes broader alphabetical letters. |
| 81 var m = s.match(/(\S*[\w\u00C0-\u1FFF]\S*)/g); |
| 82 return (m ? m.length : 0); |
| 83 }-*/; |
| 84 } |
| 85 |
| 86 public static void setWordCounter(String text) { |
| 87 sWordCounter = selectWordCounter(text); |
| 88 } |
| 89 |
| 90 public static WordCounter selectWordCounter(String text) { |
| 91 final RegExp rFull = RegExp.compile("[\\u3040-\\uA4CF]", "g"); |
| 92 final RegExp rLetter = RegExp.compile("[\\uAC00-\\uD7AF]", "g"); |
| 93 |
| 94 if (rFull.test(text)) { |
| 95 return new FullWordCounter(); |
| 96 } else if (rLetter.test(text)) { |
| 97 return new LetterWordCounter(); |
| 98 } else { |
| 99 return new FastWordCounter(); |
| 100 } |
| 101 } |
| 102 |
| 103 // Use the safest version of WordCounter as the default. |
| 104 static WordCounter sWordCounter = new FullWordCounter(); |
| 105 |
| 106 public static int countWords(String s) { |
| 107 return sWordCounter.count(s); |
| 108 }; |
| 53 | 109 |
| 54 public static native String regexEscape(String s) /*-{ | 110 public static native String regexEscape(String s) /*-{ |
| 55 return s.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&"); | 111 return s.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&"); |
| 56 }-*/; | 112 }-*/; |
| 57 | 113 |
| 58 /* | 114 /* |
| 59 * Returns true if character is a digit. | 115 * Returns true if character is a digit. |
| 60 */ | 116 */ |
| 61 public static native boolean isDigit(Character c) /*-{ | 117 public static native boolean isDigit(Character c) /*-{ |
| 62 return /\d/.test(c); | 118 return /\d/.test(c); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 79 /** | 135 /** |
| 80 * Returns the plain number if given string can be converted to one >= 0. | 136 * Returns the plain number if given string can be converted to one >= 0. |
| 81 * Returns -1 if string is empty or not all digits. | 137 * Returns -1 if string is empty or not all digits. |
| 82 */ | 138 */ |
| 83 public static int toNumber(String s) { | 139 public static int toNumber(String s) { |
| 84 if (s.isEmpty() || !StringUtil.isStringAllDigits(s)) return -1; | 140 if (s.isEmpty() || !StringUtil.isStringAllDigits(s)) return -1; |
| 85 return JavaScript.parseInt(s, 10); | 141 return JavaScript.parseInt(s, 10); |
| 86 } | 142 } |
| 87 | 143 |
| 88 } | 144 } |
| OLD | NEW |