OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/chrome_render_view_observer.h" | 5 #include "chrome/renderer/chrome_render_view_observer.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/bind_helpers.h" | 8 #include "base/bind_helpers.h" |
9 #include "base/command_line.h" | 9 #include "base/command_line.h" |
10 #include "base/debug/trace_event.h" | 10 #include "base/debug/trace_event.h" |
(...skipping 893 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
904 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the | 904 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the |
905 // translate helper for language detection and possible translation. | 905 // translate helper for language detection and possible translation. |
906 string16 contents; | 906 string16 contents; |
907 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); | 907 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); |
908 CaptureText(main_frame, &contents); | 908 CaptureText(main_frame, &contents); |
909 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, | 909 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, |
910 base::TimeTicks::Now() - capture_begin_time); | 910 base::TimeTicks::Now() - capture_begin_time); |
911 if (translate_helper_) | 911 if (translate_helper_) |
912 translate_helper_->PageCaptured(page_id, contents); | 912 translate_helper_->PageCaptured(page_id, contents); |
913 | 913 |
| 914 // TODO(shess): Is indexing "Full text search" indexing? In that |
| 915 // case more of this can go. |
914 // Skip indexing if this is not a new load. Note that the case where | 916 // Skip indexing if this is not a new load. Note that the case where |
915 // page_id == last_indexed_page_id_ is more complicated, since we need to | 917 // page_id == last_indexed_page_id_ is more complicated, since we need to |
916 // reindex if the toplevel URL has changed (such as from a redirect), even | 918 // reindex if the toplevel URL has changed (such as from a redirect), even |
917 // though this may not cause the page id to be incremented. | 919 // though this may not cause the page id to be incremented. |
918 if (page_id < last_indexed_page_id_) | 920 if (page_id < last_indexed_page_id_) |
919 return; | 921 return; |
920 | 922 |
921 bool same_page_id = last_indexed_page_id_ == page_id; | 923 bool same_page_id = last_indexed_page_id_ == page_id; |
922 if (!preliminary_capture) | 924 if (!preliminary_capture) |
923 last_indexed_page_id_ = page_id; | 925 last_indexed_page_id_ = page_id; |
(...skipping 13 matching lines...) Expand all Loading... |
937 // so these will trigger a reindex. | 939 // so these will trigger a reindex. |
938 GURL stripped_url(StripRef(url)); | 940 GURL stripped_url(StripRef(url)); |
939 if (same_page_id && stripped_url == last_indexed_url_) | 941 if (same_page_id && stripped_url == last_indexed_url_) |
940 return; | 942 return; |
941 | 943 |
942 if (!preliminary_capture) | 944 if (!preliminary_capture) |
943 last_indexed_url_ = stripped_url; | 945 last_indexed_url_ = stripped_url; |
944 | 946 |
945 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); | 947 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); |
946 | 948 |
947 if (contents.size()) { | |
948 // Send the text to the browser for indexing (the browser might decide not | |
949 // to index, if the URL is HTTPS for instance). | |
950 Send(new ChromeViewHostMsg_PageContents(routing_id(), url, contents)); | |
951 } | |
952 | |
953 #if defined(FULL_SAFE_BROWSING) | 949 #if defined(FULL_SAFE_BROWSING) |
954 // Will swap out the string. | 950 // Will swap out the string. |
955 if (phishing_classifier_) | 951 if (phishing_classifier_) |
956 phishing_classifier_->PageCaptured(&contents, preliminary_capture); | 952 phishing_classifier_->PageCaptured(&contents, preliminary_capture); |
957 #endif | 953 #endif |
958 } | 954 } |
959 | 955 |
960 void ChromeRenderViewObserver::CaptureText(WebFrame* frame, | 956 void ChromeRenderViewObserver::CaptureText(WebFrame* frame, |
961 string16* contents) { | 957 string16* contents) { |
962 contents->clear(); | 958 contents->clear(); |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1031 WebElement element = node.to<WebElement>(); | 1027 WebElement element = node.to<WebElement>(); |
1032 if (!element.hasTagName(tag_name)) | 1028 if (!element.hasTagName(tag_name)) |
1033 continue; | 1029 continue; |
1034 WebString value = element.getAttribute(attribute_name); | 1030 WebString value = element.getAttribute(attribute_name); |
1035 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) | 1031 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) |
1036 continue; | 1032 continue; |
1037 return true; | 1033 return true; |
1038 } | 1034 } |
1039 return false; | 1035 return false; |
1040 } | 1036 } |
OLD | NEW |