| Index: java/org/chromium/distiller/StringUtil.java | 
| diff --git a/java/org/chromium/distiller/StringUtil.java b/java/org/chromium/distiller/StringUtil.java | 
| index 00f387644ac3e50ab9ff091d8e21ee4dbc22a23c..b9eafefae1ed0c94f942723898846e2826bd4c9b 100644 | 
| --- a/java/org/chromium/distiller/StringUtil.java | 
| +++ b/java/org/chromium/distiller/StringUtil.java | 
| @@ -4,6 +4,7 @@ | 
|  | 
| package org.chromium.distiller; | 
|  | 
| +import com.google.gwt.core.client.JavaScriptObject; | 
| import com.google.gwt.regexp.shared.RegExp; | 
|  | 
| public class StringUtil { | 
| @@ -42,14 +43,69 @@ public class StringUtil { | 
| return RegExp.compile(regex, "gi").replace(input, replace); | 
| } | 
|  | 
| -    public static native boolean containsWordCharacter(String s) /*-{ | 
| -        return /[\w\u00C0-\u1FFF\u2C00-\uD7FF]/.test(s); | 
| -    }-*/; | 
| +    /** | 
| +     * For some languages, counting the number of words relies on non-trivial word | 
| +     * segmentation algorithms, or even huge look-up tables. This function needs to | 
| +     * be reasonably fast, so the word count for some languages would only be an | 
| +     * approximation. | 
| +     * Read https://crbug.com/484750 for more info. | 
| +     */ | 
| +    public static interface WordCounter { | 
| +        public int count(String s); | 
| +    } | 
|  | 
| -    public static native int countWords(String s) /*-{ | 
| -        var m = s.match(/(\S*[\w\u00C0-\u1FFF\u2C00-\uD7FF]\S*)/g); | 
| -        return m ? m.length : 0; | 
| -    }-*/; | 
| +    public static class FullWordCounter implements WordCounter { | 
| +        public native int count(String s) /*-{ | 
| +            // The following range includes broader alphabetical letters and Hangul Syllables. | 
| +            var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); | 
| +            var c = (m ? m.length : 0); | 
| +            // The following range includes Hiragana, Katakana, and CJK Unified Ideographs. | 
| +            // Hangul Syllables are not included. | 
| +            m = s.match(/([\u3040-\uA4CF])/g); | 
| +            c += Math.ceil((m ? m.length : 0) * 0.55); | 
| +            return c; | 
| +        }-*/; | 
| +    } | 
| + | 
| +    public static class LetterWordCounter implements WordCounter { | 
| +        public native int count(String s) /*-{ | 
| +            // The following range includes broader alphabetical letters and Hangul Syllables. | 
| +            var m = s.match(/(\S*[\w\u00C0-\u1FFF\uAC00-\uD7AF]\S*)/g); | 
| +            return (m ? m.length : 0); | 
| +        }-*/; | 
| +    } | 
| + | 
| +    public static class FastWordCounter implements WordCounter { | 
| +        public native int count(String s) /*-{ | 
| +            // The following range includes broader alphabetical letters. | 
| +            var m = s.match(/(\S*[\w\u00C0-\u1FFF]\S*)/g); | 
| +            return (m ? m.length : 0); | 
| +        }-*/; | 
| +    } | 
| + | 
| +    public static void setWordCounter(String text) { | 
| +        sWordCounter = selectWordCounter(text); | 
| +    } | 
| + | 
| +    public static WordCounter selectWordCounter(String text) { | 
| +        final RegExp rFull = RegExp.compile("[\\u3040-\\uA4CF]", "g"); | 
| +        final RegExp rLetter = RegExp.compile("[\\uAC00-\\uD7AF]", "g"); | 
| + | 
| +        if (rFull.test(text)) { | 
| +            return new FullWordCounter(); | 
| +        } else if (rLetter.test(text)) { | 
| +            return new LetterWordCounter(); | 
| +        } else { | 
| +            return new FastWordCounter(); | 
| +        } | 
| +    } | 
| + | 
| +    // Use the safest version of WordCounter as the default. | 
| +    static WordCounter sWordCounter = new FullWordCounter(); | 
| + | 
| +    public static int countWords(String s) { | 
| +        return sWordCounter.count(s); | 
| +    }; | 
|  | 
| public static native String regexEscape(String s) /*-{ | 
| return s.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&"); | 
|  |