OLD | NEW |
---|---|
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.chrome.browser.contextualsearch; | 5 package org.chromium.chrome.browser.contextualsearch; |
6 | 6 |
7 import android.annotation.SuppressLint; | 7 import android.annotation.SuppressLint; |
8 import android.os.Build; | |
8 import android.text.TextUtils; | 9 import android.text.TextUtils; |
9 | 10 |
10 import org.chromium.base.annotations.CalledByNative; | 11 import org.chromium.base.annotations.CalledByNative; |
11 | 12 |
12 import javax.annotation.Nullable; | 13 import javax.annotation.Nullable; |
13 | 14 |
14 /** | 15 /** |
15 * Provides a context in which to search, and links to the native ContextualSear chContext. | 16 * Provides a context in which to search, and links to the native ContextualSear chContext. |
16 * Includes the selection, selection offsets, surrounding page content, etc. | 17 * Includes the selection, selection offsets, surrounding page content, etc. |
17 * Requires an override of #onSelectionChanged to call when a non-empty selectio n is established | 18 * Requires an override of #onSelectionChanged to call when a non-empty selectio n is established |
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
268 /** | 269 /** |
269 * Finds the offset of the start of the word that includes the given initial offset. | 270 * Finds the offset of the start of the word that includes the given initial offset. |
270 * The character at the initial offset is not examined, but the one before i t is, and scanning | 271 * The character at the initial offset is not examined, but the one before i t is, and scanning |
271 * continues on to earlier characters until a non-word character is found. The offset just | 272 * continues on to earlier characters until a non-word character is found. The offset just |
272 * before the non-word character is returned. If the initial offset is a sp ace immediately | 273 * before the non-word character is returned. If the initial offset is a sp ace immediately |
273 * following a word then the start offset of that word is returned. | 274 * following a word then the start offset of that word is returned. |
274 * @param text The text to scan. | 275 * @param text The text to scan. |
275 * @param initial The initial offset to scan before. | 276 * @param initial The initial offset to scan before. |
276 * @return The start of the word that contains the given initial offset, wit hin {@code text}. | 277 * @return The start of the word that contains the given initial offset, wit hin {@code text}. |
277 */ | 278 */ |
278 private int findWordStartOffset(String text, int initial) { | 279 private int findWordStartOffset(String text, int initial) { |
Theresa
2017/06/02 21:51:42
This is just used to generate a ranker signal for
Donn Denman
2017/06/02 22:00:42
Correct, so it's non-critical for the feature.
| |
279 // Scan before, aborting if we hit any ideographic letter. | 280 // Scan before, aborting if we hit any ideographic letter. |
280 for (int offset = initial - 1; offset >= 0; offset--) { | 281 for (int offset = initial - 1; offset >= 0; offset--) { |
281 if (isIdeographicAtIndex(text, offset)) return INVALID_OFFSET; | 282 if (isUnreliableWordBreakAtIndex(text, offset)) return INVALID_OFFSE T; |
282 | 283 |
283 if (isWordBreakAtIndex(text, offset)) { | 284 if (isWordBreakAtIndex(text, offset)) { |
284 // The start of the word is after this word break. | 285 // The start of the word is after this word break. |
285 return offset + 1; | 286 return offset + 1; |
286 } | 287 } |
287 } | 288 } |
288 return INVALID_OFFSET; | 289 return INVALID_OFFSET; |
289 } | 290 } |
290 | 291 |
291 /** | 292 /** |
292 * Finds the offset of the end of the word that includes the given initial o ffset. | 293 * Finds the offset of the end of the word that includes the given initial o ffset. |
293 * NOTE: this is the index of the character just past the last character of the word, | 294 * NOTE: this is the index of the character just past the last character of the word, |
294 * so a 3 character word "who" has start index 0 and end index 3. | 295 * so a 3 character word "who" has start index 0 and end index 3. |
295 * The character at the initial offset is examined and each one after that t oo until a non-word | 296 * The character at the initial offset is examined and each one after that t oo until a non-word |
296 * character is encountered, and that offset will be returned. | 297 * character is encountered, and that offset will be returned. |
297 * @param text The text to scan. | 298 * @param text The text to scan. |
298 * @param initial The initial offset to scan from. | 299 * @param initial The initial offset to scan from. |
299 * @return The end of the word that contains the given initial offset, withi n {@code text}. | 300 * @return The end of the word that contains the given initial offset, withi n {@code text}. |
300 */ | 301 */ |
301 private int findWordEndOffset(String text, int initial) { | 302 private int findWordEndOffset(String text, int initial) { |
302 // Scan after, aborting if we hit any CJKN letter. | 303 // Scan after, aborting if we hit any CJKN letter. |
303 for (int offset = initial; offset < text.length(); offset++) { | 304 for (int offset = initial; offset < text.length(); offset++) { |
304 if (isIdeographicAtIndex(text, offset)) return INVALID_OFFSET; | 305 if (isUnreliableWordBreakAtIndex(text, offset)) return INVALID_OFFSE T; |
305 | 306 |
306 if (isWordBreakAtIndex(text, offset)) { | 307 if (isWordBreakAtIndex(text, offset)) { |
307 // The end of the word is the offset of this word break. | 308 // The end of the word is the offset of this word break. |
308 return offset; | 309 return offset; |
309 } | 310 } |
310 } | 311 } |
311 return INVALID_OFFSET; | 312 return INVALID_OFFSET; |
312 } | 313 } |
313 | 314 |
314 /** | 315 /** |
315 * @return Whether the character at the given index in the text is "Ideograp hic" (as in CJKV | 316 * @return Whether the character at the given index in the text might be in an alphabet that has |
316 * languages), which means there may not be reliable word breaks. | 317 * unreliable word breaks, such as CKJV languages. Returns {@code t rue} on older |
318 * platforms where we can't know for sure. | |
317 */ | 319 */ |
318 @SuppressLint("NewApi") | 320 @SuppressLint("NewApi") |
319 private boolean isIdeographicAtIndex(String text, int index) { | 321 private boolean isUnreliableWordBreakAtIndex(String text, int index) { |
320 return Character.isIdeographic(text.charAt(index)); | 322 if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.KITKAT) { |
323 return Character.isIdeographic(text.charAt(index)); | |
324 } else { | |
325 return true; // Assume the worst if we can't check. | |
326 } | |
321 } | 327 } |
322 | 328 |
323 /** | 329 /** |
324 * @return Whether the character at the given index is a word-break. | 330 * @return Whether the character at the given index is a word-break. |
325 */ | 331 */ |
326 private boolean isWordBreakAtIndex(String text, int index) { | 332 private boolean isWordBreakAtIndex(String text, int index) { |
327 return !Character.isLetterOrDigit(text.charAt(index)) | 333 return !Character.isLetterOrDigit(text.charAt(index)) |
328 && text.codePointAt(index) != SOFT_HYPHEN_CHAR; | 334 && text.codePointAt(index) != SOFT_HYPHEN_CHAR; |
329 } | 335 } |
330 | 336 |
(...skipping 10 matching lines...) Expand all Loading... | |
341 // ========================================================================= =================== | 347 // ========================================================================= =================== |
342 // Native methods. | 348 // Native methods. |
343 // ========================================================================= =================== | 349 // ========================================================================= =================== |
344 private native long nativeInit(); | 350 private native long nativeInit(); |
345 private native void nativeDestroy(long nativeContextualSearchContext); | 351 private native void nativeDestroy(long nativeContextualSearchContext); |
346 private native void nativeSetResolveProperties( | 352 private native void nativeSetResolveProperties( |
347 long nativeContextualSearchContext, String homeCountry, boolean mayS endBasePageUrl); | 353 long nativeContextualSearchContext, String homeCountry, boolean mayS endBasePageUrl); |
348 private native void nativeAdjustSelection( | 354 private native void nativeAdjustSelection( |
349 long nativeContextualSearchContext, int startAdjust, int endAdjust); | 355 long nativeContextualSearchContext, int startAdjust, int endAdjust); |
350 } | 356 } |
OLD | NEW |