| OLD | NEW |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.chrome.browser; | 5 package org.chromium.chrome.browser; |
| 6 | 6 |
| 7 import android.os.SystemClock; | 7 import android.os.SystemClock; |
| 8 import android.util.LruCache; | 8 import android.util.LruCache; |
| 9 import android.webkit.URLUtil; | 9 import android.webkit.URLUtil; |
| 10 | 10 |
| 11 import org.chromium.base.Callback; | 11 import org.chromium.base.Callback; |
| 12 import org.chromium.base.SysUtils; | 12 import org.chromium.base.SysUtils; |
| 13 import org.chromium.base.VisibleForTesting; | 13 import org.chromium.base.VisibleForTesting; |
| 14 import org.chromium.base.metrics.RecordHistogram; |
| 14 import org.chromium.blink.mojom.document_metadata.CopylessPaste; | 15 import org.chromium.blink.mojom.document_metadata.CopylessPaste; |
| 15 import org.chromium.blink.mojom.document_metadata.WebPage; | 16 import org.chromium.blink.mojom.document_metadata.WebPage; |
| 16 import org.chromium.chrome.browser.historyreport.AppIndexingReporter; | 17 import org.chromium.chrome.browser.historyreport.AppIndexingReporter; |
| 17 import org.chromium.chrome.browser.tab.Tab; | 18 import org.chromium.chrome.browser.tab.Tab; |
| 18 import org.chromium.content_public.browser.RenderFrameHost; | 19 import org.chromium.content_public.browser.RenderFrameHost; |
| 19 import org.chromium.content_public.browser.WebContents; | 20 import org.chromium.content_public.browser.WebContents; |
| 20 import org.chromium.services.service_manager.InterfaceProvider; | 21 import org.chromium.services.service_manager.InterfaceProvider; |
| 21 | 22 |
| 22 /** | 23 /** |
| 23 * This is the top-level CopylessPaste metadata extraction for AppIndexing. | 24 * This is the top-level CopylessPaste metadata extraction for AppIndexing. |
| 24 */ | 25 */ |
| 25 public class AppIndexingUtil { | 26 public class AppIndexingUtil { |
| 26 private static final int CACHE_SIZE = 100; | 27 private static final int CACHE_SIZE = 100; |
| 27 private static final int CACHE_VISIT_CUTOFF_MS = 60 * 60 * 1000; // 1 hour | 28 private static final int CACHE_VISIT_CUTOFF_MS = 60 * 60 * 1000; // 1 hour |
| 28 // Cache of recently seen urls. If a url is among the CACHE_SIZE most recent
pages visited, and | 29 // Cache of recently seen urls. If a url is among the CACHE_SIZE most recent
pages visited, and |
| 29 // the parse was in the last CACHE_VISIT_CUTOFF_MS milliseconds, then we don
't parse the page, | 30 // the parse was in the last CACHE_VISIT_CUTOFF_MS milliseconds, then we don
't parse the page, |
| 30 // and instead just report the view (not the content) to App Indexing. | 31 // and instead just report the view (not the content) to App Indexing. |
| 31 private LruCache<String, CacheEntry> mPageCache; | 32 private LruCache<String, CacheEntry> mPageCache; |
| 32 | 33 |
| 33 private static Callback<WebPage> sCallbackForTesting; | 34 private static Callback<WebPage> sCallbackForTesting; |
| 34 | 35 |
| 36 // Constants used to log UMA "enum" histograms about the cache state. |
| 37 // The values should not be changed or reused, and CACHE_HISTOGRAM_BOUNDARY
should be the last. |
| 38 private static final int CACHE_HIT_WITH_ENTITY = 0; |
| 39 private static final int CACHE_HIT_WITHOUT_ENTITY = 1; |
| 40 private static final int CACHE_MISS = 2; |
| 41 private static final int CACHE_HISTOGRAM_BOUNDARY = 3; |
| 42 |
| 35 /** | 43 /** |
| 36 * Extracts entities from document metadata and reports it to on-device App
Indexing. | 44 * Extracts entities from document metadata and reports it to on-device App
Indexing. |
| 37 * This call can cache entities from recently parsed webpages, in which case
, only the url and | 45 * This call can cache entities from recently parsed webpages, in which case
, only the url and |
| 38 * title of the page is reported to App Indexing. | 46 * title of the page is reported to App Indexing. |
| 39 */ | 47 */ |
| 40 public void extractCopylessPasteMetadata(final Tab tab) { | 48 public void extractCopylessPasteMetadata(final Tab tab) { |
| 41 final String url = tab.getUrl(); | 49 final String url = tab.getUrl(); |
| 42 boolean isHttpOrHttps = URLUtil.isHttpsUrl(url) || URLUtil.isHttpUrl(url
); | 50 boolean isHttpOrHttps = URLUtil.isHttpsUrl(url) || URLUtil.isHttpUrl(url
); |
| 43 if (!isEnabledForDevice() || tab.isIncognito() || !isHttpOrHttps) { | 51 if (!isEnabledForDevice() || tab.isIncognito() || !isHttpOrHttps) { |
| 44 return; | 52 return; |
| 45 } | 53 } |
| 46 | 54 |
| 47 // There are three conditions that can occur with respect to the cache. | 55 // There are three conditions that can occur with respect to the cache. |
| 48 // 1. Cache hit, and an entity was found previously. Report only the pag
e view to App | 56 // 1. Cache hit, and an entity was found previously. Report only the pag
e view to App |
| 49 // Indexing. | 57 // Indexing. |
| 50 // 2. Cache hit, but no entity was found. Ignore. | 58 // 2. Cache hit, but no entity was found. Ignore. |
| 51 // 3. Cache miss, we need to parse the page. | 59 // 3. Cache miss, we need to parse the page. |
| 52 if (wasPageVisitedRecently(url)) { | 60 if (wasPageVisitedRecently(url)) { |
| 53 if (lastPageVisitContainedEntity(url)) { | 61 if (lastPageVisitContainedEntity(url)) { |
| 54 // Condition 1 | 62 // Condition 1 |
| 63 RecordHistogram.recordEnumeratedHistogram( |
| 64 "CopylessPaste.CacheHit", CACHE_HIT_WITH_ENTITY, CACHE_H
ISTOGRAM_BOUNDARY); |
| 55 getAppIndexingReporter().reportWebPageView(url, tab.getTitle()); | 65 getAppIndexingReporter().reportWebPageView(url, tab.getTitle()); |
| 66 return; |
| 56 } | 67 } |
| 57 // Condition 2 | 68 // Condition 2 |
| 69 RecordHistogram.recordEnumeratedHistogram( |
| 70 "CopylessPaste.CacheHit", CACHE_HIT_WITHOUT_ENTITY, CACHE_HI
STOGRAM_BOUNDARY); |
| 58 } else { | 71 } else { |
| 59 // Condition 3 | 72 // Condition 3 |
| 73 RecordHistogram.recordEnumeratedHistogram( |
| 74 "CopylessPaste.CacheHit", CACHE_MISS, CACHE_HISTOGRAM_BOUNDA
RY); |
| 60 CopylessPaste copylessPaste = getCopylessPasteInterface(tab); | 75 CopylessPaste copylessPaste = getCopylessPasteInterface(tab); |
| 61 if (copylessPaste == null) { | 76 if (copylessPaste == null) { |
| 62 return; | 77 return; |
| 63 } | 78 } |
| 64 copylessPaste.getEntities(new CopylessPaste.GetEntitiesResponse() { | 79 copylessPaste.getEntities(new CopylessPaste.GetEntitiesResponse() { |
| 65 @Override | 80 @Override |
| 66 public void call(WebPage webpage) { | 81 public void call(WebPage webpage) { |
| 67 putCacheEntry(url, webpage != null); | 82 putCacheEntry(url, webpage != null); |
| 68 if (sCallbackForTesting != null) { | 83 if (sCallbackForTesting != null) { |
| 69 sCallbackForTesting.onResult(webpage); | 84 sCallbackForTesting.onResult(webpage); |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 147 mPageCache = new LruCache<String, CacheEntry>(CACHE_SIZE); | 162 mPageCache = new LruCache<String, CacheEntry>(CACHE_SIZE); |
| 148 } | 163 } |
| 149 return mPageCache; | 164 return mPageCache; |
| 150 } | 165 } |
| 151 | 166 |
| 152 private static class CacheEntry { | 167 private static class CacheEntry { |
| 153 public long lastSeenTimeMs; | 168 public long lastSeenTimeMs; |
| 154 public boolean containedEntity; | 169 public boolean containedEntity; |
| 155 } | 170 } |
| 156 } | 171 } |
| OLD | NEW |