| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/chrome_render_view_observer.h" | 5 #include "chrome/renderer/chrome_render_view_observer.h" |
| 6 | 6 |
| 7 #include "base/bind.h" | 7 #include "base/bind.h" |
| 8 #include "base/bind_helpers.h" | 8 #include "base/bind_helpers.h" |
| 9 #include "base/command_line.h" | 9 #include "base/command_line.h" |
| 10 #include "base/debug/trace_event.h" | 10 #include "base/debug/trace_event.h" |
| (...skipping 893 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 904 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the | 904 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the |
| 905 // translate helper for language detection and possible translation. | 905 // translate helper for language detection and possible translation. |
| 906 string16 contents; | 906 string16 contents; |
| 907 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); | 907 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); |
| 908 CaptureText(main_frame, &contents); | 908 CaptureText(main_frame, &contents); |
| 909 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, | 909 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, |
| 910 base::TimeTicks::Now() - capture_begin_time); | 910 base::TimeTicks::Now() - capture_begin_time); |
| 911 if (translate_helper_) | 911 if (translate_helper_) |
| 912 translate_helper_->PageCaptured(page_id, contents); | 912 translate_helper_->PageCaptured(page_id, contents); |
| 913 | 913 |
| 914 // TODO(shess): Is indexing "Full text search" indexing? In that |
| 915 // case more of this can go. |
| 914 // Skip indexing if this is not a new load. Note that the case where | 916 // Skip indexing if this is not a new load. Note that the case where |
| 915 // page_id == last_indexed_page_id_ is more complicated, since we need to | 917 // page_id == last_indexed_page_id_ is more complicated, since we need to |
| 916 // reindex if the toplevel URL has changed (such as from a redirect), even | 918 // reindex if the toplevel URL has changed (such as from a redirect), even |
| 917 // though this may not cause the page id to be incremented. | 919 // though this may not cause the page id to be incremented. |
| 918 if (page_id < last_indexed_page_id_) | 920 if (page_id < last_indexed_page_id_) |
| 919 return; | 921 return; |
| 920 | 922 |
| 921 bool same_page_id = last_indexed_page_id_ == page_id; | 923 bool same_page_id = last_indexed_page_id_ == page_id; |
| 922 if (!preliminary_capture) | 924 if (!preliminary_capture) |
| 923 last_indexed_page_id_ = page_id; | 925 last_indexed_page_id_ = page_id; |
| (...skipping 13 matching lines...) Expand all Loading... |
| 937 // so these will trigger a reindex. | 939 // so these will trigger a reindex. |
| 938 GURL stripped_url(StripRef(url)); | 940 GURL stripped_url(StripRef(url)); |
| 939 if (same_page_id && stripped_url == last_indexed_url_) | 941 if (same_page_id && stripped_url == last_indexed_url_) |
| 940 return; | 942 return; |
| 941 | 943 |
| 942 if (!preliminary_capture) | 944 if (!preliminary_capture) |
| 943 last_indexed_url_ = stripped_url; | 945 last_indexed_url_ = stripped_url; |
| 944 | 946 |
| 945 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); | 947 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); |
| 946 | 948 |
| 947 if (contents.size()) { | |
| 948 // Send the text to the browser for indexing (the browser might decide not | |
| 949 // to index, if the URL is HTTPS for instance). | |
| 950 Send(new ChromeViewHostMsg_PageContents(routing_id(), url, contents)); | |
| 951 } | |
| 952 | |
| 953 #if defined(FULL_SAFE_BROWSING) | 949 #if defined(FULL_SAFE_BROWSING) |
| 954 // Will swap out the string. | 950 // Will swap out the string. |
| 955 if (phishing_classifier_) | 951 if (phishing_classifier_) |
| 956 phishing_classifier_->PageCaptured(&contents, preliminary_capture); | 952 phishing_classifier_->PageCaptured(&contents, preliminary_capture); |
| 957 #endif | 953 #endif |
| 958 } | 954 } |
| 959 | 955 |
| 960 void ChromeRenderViewObserver::CaptureText(WebFrame* frame, | 956 void ChromeRenderViewObserver::CaptureText(WebFrame* frame, |
| 961 string16* contents) { | 957 string16* contents) { |
| 962 contents->clear(); | 958 contents->clear(); |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1031 WebElement element = node.to<WebElement>(); | 1027 WebElement element = node.to<WebElement>(); |
| 1032 if (!element.hasTagName(tag_name)) | 1028 if (!element.hasTagName(tag_name)) |
| 1033 continue; | 1029 continue; |
| 1034 WebString value = element.getAttribute(attribute_name); | 1030 WebString value = element.getAttribute(attribute_name); |
| 1035 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) | 1031 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) |
| 1036 continue; | 1032 continue; |
| 1037 return true; | 1033 return true; |
| 1038 } | 1034 } |
| 1039 return false; | 1035 return false; |
| 1040 } | 1036 } |
| OLD | NEW |