Chromium Code Reviews| Index: chrome/android/java/src/org/chromium/chrome/browser/AppIndexingUtil.java |
| diff --git a/chrome/android/java/src/org/chromium/chrome/browser/AppIndexingUtil.java b/chrome/android/java/src/org/chromium/chrome/browser/AppIndexingUtil.java |
| index 4e4aca058b446417c865412c358189ba45edb456..ef7ff2010ac20f4249b38311cf05e5997cd30e6b 100644 |
| --- a/chrome/android/java/src/org/chromium/chrome/browser/AppIndexingUtil.java |
| +++ b/chrome/android/java/src/org/chromium/chrome/browser/AppIndexingUtil.java |
| @@ -4,9 +4,12 @@ |
| package org.chromium.chrome.browser; |
| +import android.os.SystemClock; |
| +import android.util.LruCache; |
| import android.webkit.URLUtil; |
| import org.chromium.base.SysUtils; |
| +import org.chromium.base.VisibleForTesting; |
| import org.chromium.blink.mojom.document_metadata.CopylessPaste; |
| import org.chromium.blink.mojom.document_metadata.WebPage; |
| import org.chromium.chrome.browser.historyreport.AppIndexingReporter; |
| @@ -19,31 +22,118 @@ import org.chromium.services.service_manager.InterfaceProvider; |
| * This is the top-level CopylessPaste metadata extraction for AppIndexing. |
| */ |
| public class AppIndexingUtil { |
| - public static void extractCopylessPasteMetadata(final Tab tab) { |
| - String url = tab.getUrl(); |
| + private static final int CACHE_SIZE = 100; |
| + private static final int CACHE_VISIT_CUTOFF_MS = 60 * 60 * 1000; // 1 hour |
| + // Cache of recently seen urls. If a url is among the CACHE_SIZE most recent pages visited, and |
| + // the visit was in the last CACHE_VISIT_CUTOFF_MS milliseconds, then we don't parse the page, |
|
wychen
2017/04/13 07:05:00
and the *parsing* was in the last ...
Our expirat
dproctor
2017/04/13 17:23:09
Thanks, yeah, the comparison is made based on the
|
| + // and instead just report the view (not the content) to App Indexing. |
| + private final LruCache<String, CacheEntry> mPageCache; |
| + |
| + public AppIndexingUtil() { |
| + this(CACHE_SIZE); |
| + } |
| + |
| + private AppIndexingUtil(int cacheSize) { |
| + mPageCache = new LruCache<String, CacheEntry>(cacheSize); |
| + } |
| + |
| + public void extractCopylessPasteMetadata(final Tab tab) { |
| + final String url = tab.getUrl(); |
| boolean isHttpOrHttps = URLUtil.isHttpsUrl(url) || URLUtil.isHttpUrl(url); |
| - if (SysUtils.isLowEndDevice() || tab.isIncognito() |
| - || !ChromeFeatureList.isEnabled(ChromeFeatureList.COPYLESS_PASTE) |
| - || !isHttpOrHttps) { |
| + if (!isEnabledForDevice() || tab.isIncognito() || !isHttpOrHttps) { |
| return; |
| } |
| + // There are three conditions that can occur with respect to the cache. |
|
wychen
2017/04/13 07:05:00
We might want to add UMA to track the distribution
dproctor
2017/04/13 17:34:58
How about we add this in a follow-up cl?
wychen
2017/04/13 18:04:45
No problem.
|
| + // 1. Cache hit, and an entity was found previously. Report only the page view to App |
| + // Indexing. |
| + // 2. Cache hit, but no entity was found. Ignore. |
| + // 3. Cache miss, we need to parse the page. |
| + if (wasPageVisitedRecently(url)) { |
| + if (lastPageVisitContainedEntity(url)) { |
| + // Condition 1 |
| + getAppIndexingReporter().reportWebPageView(url, tab.getTitle()); |
| + } |
| + // Condition 2 |
| + } else { |
| + // Condition 3 |
| + CopylessPaste copylessPaste = getCopylessPasteInterface(tab); |
| + if (copylessPaste == null) { |
| + return; |
| + } |
| + copylessPaste.getEntities(new CopylessPaste.GetEntitiesResponse() { |
| + @Override |
| + public void call(WebPage webpage) { |
| + putCacheEntry(url, webpage != null); |
| + if (webpage == null) return; |
| + getAppIndexingReporter().reportWebPage(webpage); |
| + } |
| + }); |
| + } |
| + } |
| + |
| + private boolean wasPageVisitedRecently(String url) { |
| + if (url == null) { |
| + return false; |
| + } |
| + CacheEntry entry = mPageCache.get(url); |
| + if (entry == null || getElapsedTime() - entry.lastSeenTimeMs > CACHE_VISIT_CUTOFF_MS) { |
| + return false; |
| + } |
| + return true; |
| + } |
| + |
| + // Returns true if the page is in the cache and it contained an entity the last time it was |
| + // visited. |
|
wychen
2017/04/13 07:05:00
it was *parsed*
dproctor
2017/04/13 17:23:09
Done.
|
| + private boolean lastPageVisitContainedEntity(String url) { |
| + if (url == null) { |
| + return false; |
| + } |
| + CacheEntry entry = mPageCache.get(url); |
| + if (entry == null || !entry.containedEntity) { |
| + return false; |
| + } |
| + return true; |
| + } |
| + |
| + private void putCacheEntry(String url, boolean containedEntity) { |
| + CacheEntry entry = new CacheEntry(); |
| + entry.lastSeenTimeMs = getElapsedTime(); |
| + entry.containedEntity = containedEntity; |
| + mPageCache.put(url, entry); |
| + } |
| + |
| + @VisibleForTesting |
| + AppIndexingReporter getAppIndexingReporter() { |
| + return AppIndexingReporter.getInstance(); |
| + } |
| + |
| + @VisibleForTesting |
| + CopylessPaste getCopylessPasteInterface(Tab tab) { |
| WebContents webContents = tab.getWebContents(); |
| - if (webContents == null) return; |
| + if (webContents == null) return null; |
| RenderFrameHost mainFrame = webContents.getMainFrame(); |
| - if (mainFrame == null) return; |
| + if (mainFrame == null) return null; |
| InterfaceProvider interfaces = mainFrame.getRemoteInterfaces(); |
| - if (interfaces == null) return; |
| - |
| - CopylessPaste copylesspaste = interfaces.getInterface(CopylessPaste.MANAGER); |
| - copylesspaste.getEntities(new CopylessPaste.GetEntitiesResponse() { |
| - @Override |
| - public void call(WebPage webpage) { |
| - if (webpage == null) return; |
| - AppIndexingReporter.getInstance().reportWebPage(webpage); |
| - } |
| - }); |
| + if (interfaces == null) return null; |
| + |
| + return interfaces.getInterface(CopylessPaste.MANAGER); |
| + } |
| + |
| + @VisibleForTesting |
| + long getElapsedTime() { |
| + return SystemClock.elapsedRealtime(); |
| + } |
| + |
| + boolean isEnabledForDevice() { |
| + return !SysUtils.isLowEndDevice() |
| + && ChromeFeatureList.isEnabled(ChromeFeatureList.COPYLESS_PASTE); |
| + } |
| + |
| + private static class CacheEntry { |
| + public long lastSeenTimeMs; |
| + public boolean containedEntity; |
| } |
| } |