Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(161)

Side by Side Diff: java/org/chromium/distiller/StringUtil.java

Issue 1131853006: Fix word count issue for Chinese and Japanese (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: address comments Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.regexp.shared.RegExp; 7 import com.google.gwt.regexp.shared.RegExp;
8 8
9 public class StringUtil { 9 public class StringUtil {
10 // For the whitespace-related functions below, Java's and Javascript's versi ons of '\s' and '\S' 10 // For the whitespace-related functions below, Java's and Javascript's versi ons of '\s' and '\S'
(...skipping 24 matching lines...) Expand all
35 } 35 }
36 36
37 public static boolean match(String input, String regex) { 37 public static boolean match(String input, String regex) {
38 return RegExp.compile(regex, "i").test(input); 38 return RegExp.compile(regex, "i").test(input);
39 } 39 }
40 40
41 public static String findAndReplace(String input, String regex, String repla ce) { 41 public static String findAndReplace(String input, String regex, String repla ce) {
42 return RegExp.compile(regex, "gi").replace(input, replace); 42 return RegExp.compile(regex, "gi").replace(input, replace);
43 } 43 }
44 44
45 public static native boolean containsWordCharacter(String s) /*-{ 45 /**
46 return /[\w\u00C0-\u1FFF\u2C00-\uD7FF]/.test(s); 46 * For some languages, counting the number of words relies on non-trivial wo rd
47 * segmentation algorithms, or even huge look-up tables. This function needs to
48 * be reasonably fast, so the word count for some languages would only be an
49 * approximation.
50 * Read https://crbug.com/484750 for more info.
51 */
52 public static native void selectCountWordsFunc(String text) /*-{
53 if (/[\u3040-\uA4CF]/.test(text)) {
54 $_countWords = function(s) {
cjhopman 2015/05/22 00:11:58 This is still going to modify the global window ob
cjhopman 2015/05/22 00:13:29 Oh, I see what you did. Yeah, we shouldn't add glo
wychen 2015/05/23 00:32:27 Found a workaround. PTAL.
55 // The following range includes broader alphabetical letters and Hangul Syllables.
56 var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g);
57 var c = (m ? m.length : 0);
58 // The following range includes Hiragana, Katakana, and CJK Unif ied Ideographs.
59 // Hangul Syllables are not included.
60 m = s.match(/([\u3040-\uA4CF])/g);
61 c += Math.ceil((m ? m.length : 0) * 0.55);
62 return c;
63 };
64 return;
65 }
66 if (/[\AC00-\uD7AF]/.test(text)) {
67 $_countWords = function(s) {
68 // The following range includes broader alphabetical letters and Hangul Syllables.
69 var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g);
70 return (m ? m.length : 0);
71 };
72 return;
73 }
74 $_countWords = function(s) {
75 // The following range includes broader alphabetical letters.
76 var m = s.match(/(\S*[\w\u00C0-\u1FFF]\S*)/g);
77 return (m ? m.length : 0);
78 };
47 }-*/; 79 }-*/;
48 80
81 private static final int __dummy_setCountWords = initCountWords();
82 private static int initCountWords() {
83 selectCountWordsFunc("字");
84 return 0;
85 }
86
49 public static native int countWords(String s) /*-{ 87 public static native int countWords(String s) /*-{
50 var m = s.match(/(\S*[\w\u00C0-\u1FFF\u2C00-\uD7FF]\S*)/g); 88 return $_countWords(s);
51 return m ? m.length : 0;
52 }-*/; 89 }-*/;
53 90
54 public static native String regexEscape(String s) /*-{ 91 public static native String regexEscape(String s) /*-{
55 return s.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&"); 92 return s.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&");
56 }-*/; 93 }-*/;
57 94
58 /* 95 /*
59 * Returns true if character is a digit. 96 * Returns true if character is a digit.
60 */ 97 */
61 public static native boolean isDigit(Character c) /*-{ 98 public static native boolean isDigit(Character c) /*-{
(...skipping 17 matching lines...) Expand all
79 /** 116 /**
80 * Returns the plain number if given string can be converted to one >= 0. 117 * Returns the plain number if given string can be converted to one >= 0.
81 * Returns -1 if string is empty or not all digits. 118 * Returns -1 if string is empty or not all digits.
82 */ 119 */
83 public static int toNumber(String s) { 120 public static int toNumber(String s) {
84 if (s.isEmpty() || !StringUtil.isStringAllDigits(s)) return -1; 121 if (s.isEmpty() || !StringUtil.isStringAllDigits(s)) return -1;
85 return JavaScript.parseInt(s, 10); 122 return JavaScript.parseInt(s, 10);
86 } 123 }
87 124
88 } 125 }
OLDNEW
« no previous file with comments | « java/org/chromium/distiller/DomDistiller.java ('k') | java/org/chromium/distiller/dev/DistillerScriptLinker.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698