Index: chrome/renderer/render_view.cc |
=================================================================== |
--- chrome/renderer/render_view.cc (revision 35751) |
+++ chrome/renderer/render_view.cc (working copy) |
@@ -261,6 +261,7 @@ |
send_preferred_size_changes_(false), |
ALLOW_THIS_IN_INITIALIZER_LIST( |
notification_provider_(new NotificationProvider(this))), |
+ determine_page_text_after_loading_stops_(false), |
view_type_(ViewType::INVALID), |
browser_window_id_(-1), |
last_top_level_navigation_page_id_(-1), |
@@ -445,6 +446,7 @@ |
IPC_MESSAGE_HANDLER(ViewMsg_CopyImageAt, OnCopyImageAt) |
IPC_MESSAGE_HANDLER(ViewMsg_ExecuteEditCommand, OnExecuteEditCommand) |
IPC_MESSAGE_HANDLER(ViewMsg_Find, OnFind) |
+ IPC_MESSAGE_HANDLER(ViewMsg_DeterminePageText, OnDeterminePageText) |
IPC_MESSAGE_HANDLER(ViewMsg_Zoom, OnZoom) |
IPC_MESSAGE_HANDLER(ViewMsg_SetZoomLevelForLoadingHost, |
OnSetZoomLevelForLoadingHost) |
@@ -596,20 +598,25 @@ |
if (!preliminary_capture) |
last_indexed_page_id_ = load_id; |
- // Get the URL for this page. |
+ // get the URL for this page |
GURL url(main_frame->url()); |
if (url.is_empty()) |
return; |
- // Retrieve the frame's full text. |
+ // full text |
std::wstring contents; |
CaptureText(main_frame, &contents); |
if (contents.size()) { |
- // Send the text to the browser for indexing (the browser might decide not |
- // to index, if the URL is HTTPS for instance) and language discovery. |
- Send(new ViewHostMsg_PageContents(routing_id_, url, load_id, contents)); |
+ // Send the text to the browser for indexing. |
+ Send(new ViewHostMsg_PageContents(url, load_id, contents)); |
} |
+ // Send over text content of this page to the browser. |
+ if (determine_page_text_after_loading_stops_) { |
+ determine_page_text_after_loading_stops_ = false; |
+ Send(new ViewMsg_DeterminePageText_Reply(routing_id_, contents)); |
+ } |
+ |
// thumbnail |
SendThumbnail(); |
} |
@@ -619,6 +626,15 @@ |
if (!frame) |
return; |
+ // Don't index any https pages. People generally don't want their bank |
+ // accounts, etc. indexed on their computer, especially since some of these |
+ // things are not marked cachable. |
+ // TODO(brettw) we may want to consider more elaborate heuristics such as |
+ // the cachability of the page. We may also want to consider subframes (this |
+ // test will still index subframes if the subframe is SSL). |
+ if (GURL(frame->url()).SchemeIsSecure()) |
+ return; |
+ |
#ifdef TIME_TEXT_RETRIEVAL |
double begin = time_util::GetHighResolutionTimeNow(); |
#endif |
@@ -2966,6 +2982,23 @@ |
} |
} |
+void RenderView::OnDeterminePageText() { |
+ if (!is_loading_) { |
+ if (!webview()) |
+ return; |
+ WebFrame* main_frame = webview()->mainFrame(); |
+ std::wstring contents; |
+ CaptureText(main_frame, &contents); |
+ Send(new ViewMsg_DeterminePageText_Reply(routing_id_, contents)); |
+ determine_page_text_after_loading_stops_ = false; |
+ return; |
+ } |
+ |
+ // We set |determine_page_text_after_loading_stops_| true here so that, |
+ // after page has been loaded completely, the text in the page is captured. |
+ determine_page_text_after_loading_stops_ = true; |
+} |
+ |
void RenderView::DnsPrefetch(const std::vector<std::string>& host_names) { |
Send(new ViewHostMsg_DnsPrefetch(host_names)); |
} |