Index: chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContext.java |
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContext.java b/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContext.java |
index 39f330c99f92492b1a48da8c719ef7f016b82c1a..e8b55e0754770d82955b7be825ce3e0538370b50 100644 |
--- a/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContext.java |
+++ b/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContext.java |
@@ -5,6 +5,7 @@ |
package org.chromium.chrome.browser.contextualsearch; |
import android.annotation.SuppressLint; |
+import android.os.Build; |
import android.text.TextUtils; |
import org.chromium.base.annotations.CalledByNative; |
@@ -278,7 +279,7 @@ public abstract class ContextualSearchContext { |
private int findWordStartOffset(String text, int initial) { |
Theresa
2017/06/02 21:51:42
This is just used to generate a ranker signal for
Donn Denman
2017/06/02 22:00:42
Correct, so it's non-critical for the feature.
|
// Scan before, aborting if we hit any ideographic letter. |
for (int offset = initial - 1; offset >= 0; offset--) { |
- if (isIdeographicAtIndex(text, offset)) return INVALID_OFFSET; |
+ if (isUnreliableWordBreakAtIndex(text, offset)) return INVALID_OFFSET; |
if (isWordBreakAtIndex(text, offset)) { |
// The start of the word is after this word break. |
@@ -301,7 +302,7 @@ public abstract class ContextualSearchContext { |
private int findWordEndOffset(String text, int initial) { |
// Scan after, aborting if we hit any CJKN letter. |
for (int offset = initial; offset < text.length(); offset++) { |
- if (isIdeographicAtIndex(text, offset)) return INVALID_OFFSET; |
+ if (isUnreliableWordBreakAtIndex(text, offset)) return INVALID_OFFSET; |
if (isWordBreakAtIndex(text, offset)) { |
// The end of the word is the offset of this word break. |
@@ -312,12 +313,17 @@ public abstract class ContextualSearchContext { |
} |
/** |
- * @return Whether the character at the given index in the text is "Ideographic" (as in CJKV |
- * languages), which means there may not be reliable word breaks. |
+ * @return Whether the character at the given index in the text might be in an alphabet that has |
+ * unreliable word breaks, such as CKJV languages. Returns {@code true} on older |
+ * platforms where we can't know for sure. |
*/ |
@SuppressLint("NewApi") |
- private boolean isIdeographicAtIndex(String text, int index) { |
- return Character.isIdeographic(text.charAt(index)); |
+ private boolean isUnreliableWordBreakAtIndex(String text, int index) { |
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.KITKAT) { |
+ return Character.isIdeographic(text.charAt(index)); |
+ } else { |
+ return true; // Assume the worst if we can't check. |
+ } |
} |
/** |