Chromium Code Reviews| Index: java/org/chromium/distiller/StringUtil.java |
| diff --git a/java/org/chromium/distiller/StringUtil.java b/java/org/chromium/distiller/StringUtil.java |
| index 00f387644ac3e50ab9ff091d8e21ee4dbc22a23c..9c0d081434f04a5d239bc651471f0710557bd55c 100644 |
| --- a/java/org/chromium/distiller/StringUtil.java |
| +++ b/java/org/chromium/distiller/StringUtil.java |
| @@ -47,8 +47,14 @@ public class StringUtil { |
| }-*/; |
| public static native int countWords(String s) /*-{ |
|
cjhopman
2015/05/15 20:16:55
Does this new approach make sense everywhere that
cjhopman
2015/05/15 20:16:55
Maybe we should change the name of this to reflect
wychen
2015/05/18 18:49:20
Well, the goal of this function is still to count
wychen
2015/05/18 18:49:20
One problem I see is the title finding part. It co
|
| - var m = s.match(/(\S*[\w\u00C0-\u1FFF\u2C00-\uD7FF]\S*)/g); |
| - return m ? m.length : 0; |
| + // The following range includes broader alphabetical letters and Hangul Syllables. |
| + var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); |
| + var c = (m ? m.length : 0); |
| + // The following range includes Hiragana, Katakana, and CJK Unified Ideographs. |
| + // Hangul Syllables are not included. |
| + m = s.match(/([\u3040-\uA4CF])/g); |
| + c += Math.ceil((m ? m.length : 0) * 0.55); |
| + return c; |
| }-*/; |
| public static native String regexEscape(String s) /*-{ |