OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/chrome_render_view_observer.h" | 5 #include "chrome/renderer/chrome_render_view_observer.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/bind_helpers.h" | 8 #include "base/bind_helpers.h" |
9 #include "base/command_line.h" | 9 #include "base/command_line.h" |
10 #include "base/debug/trace_event.h" | 10 #include "base/debug/trace_event.h" |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
89 | 89 |
90 // maximum number of characters in the document to index, any text beyond this | 90 // maximum number of characters in the document to index, any text beyond this |
91 // point will be clipped | 91 // point will be clipped |
92 static const size_t kMaxIndexChars = 65535; | 92 static const size_t kMaxIndexChars = 65535; |
93 | 93 |
94 // Constants for UMA statistic collection. | 94 // Constants for UMA statistic collection. |
95 static const char kTranslateCaptureText[] = "Translate.CaptureText"; | 95 static const char kTranslateCaptureText[] = "Translate.CaptureText"; |
96 | 96 |
97 namespace { | 97 namespace { |
98 | 98 |
99 GURL StripRef(const GURL& url) { | |
100 GURL::Replacements replacements; | |
101 replacements.ClearRef(); | |
102 return url.ReplaceComponents(replacements); | |
103 } | |
104 | |
105 #if defined(OS_ANDROID) | 99 #if defined(OS_ANDROID) |
106 // Parses the DOM for a <meta> tag with a particular name. | 100 // Parses the DOM for a <meta> tag with a particular name. |
107 // |meta_tag_content| is set to the contents of the 'content' attribute. | 101 // |meta_tag_content| is set to the contents of the 'content' attribute. |
108 // |found_tag| is set to true if the tag was successfully found. | 102 // |found_tag| is set to true if the tag was successfully found. |
109 // Returns true if the document was parsed without errors. | 103 // Returns true if the document was parsed without errors. |
110 bool RetrieveMetaTagContent(const WebFrame* main_frame, | 104 bool RetrieveMetaTagContent(const WebFrame* main_frame, |
111 const GURL& expected_url, | 105 const GURL& expected_url, |
112 const std::string& meta_tag_name, | 106 const std::string& meta_tag_name, |
113 bool* found_tag, | 107 bool* found_tag, |
114 std::string* meta_tag_content) { | 108 std::string* meta_tag_content) { |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
154 | 148 |
155 } // namespace | 149 } // namespace |
156 | 150 |
157 ChromeRenderViewObserver::ChromeRenderViewObserver( | 151 ChromeRenderViewObserver::ChromeRenderViewObserver( |
158 content::RenderView* render_view, | 152 content::RenderView* render_view, |
159 ChromeRenderProcessObserver* chrome_render_process_observer) | 153 ChromeRenderProcessObserver* chrome_render_process_observer) |
160 : content::RenderViewObserver(render_view), | 154 : content::RenderViewObserver(render_view), |
161 chrome_render_process_observer_(chrome_render_process_observer), | 155 chrome_render_process_observer_(chrome_render_process_observer), |
162 translate_helper_(new TranslateHelper(render_view)), | 156 translate_helper_(new TranslateHelper(render_view)), |
163 phishing_classifier_(NULL), | 157 phishing_classifier_(NULL), |
164 last_indexed_page_id_(-1), | |
165 capture_timer_(false, false) { | 158 capture_timer_(false, false) { |
166 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); | 159 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); |
167 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) | 160 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) |
168 OnSetClientSidePhishingDetection(true); | 161 OnSetClientSidePhishingDetection(true); |
169 } | 162 } |
170 | 163 |
171 ChromeRenderViewObserver::~ChromeRenderViewObserver() { | 164 ChromeRenderViewObserver::~ChromeRenderViewObserver() { |
172 } | 165 } |
173 | 166 |
174 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) { | 167 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) { |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
346 Send(new ChromeViewHostMsg_PageHasOSDD( | 339 Send(new ChromeViewHostMsg_PageHasOSDD( |
347 routing_id(), main_frame->document().url(), osdd_url, | 340 routing_id(), main_frame->document().url(), osdd_url, |
348 search_provider::AUTODETECTED_PROVIDER)); | 341 search_provider::AUTODETECTED_PROVIDER)); |
349 } | 342 } |
350 | 343 |
351 // Don't capture pages including refresh meta tag. | 344 // Don't capture pages including refresh meta tag. |
352 if (HasRefreshMetaTag(main_frame)) | 345 if (HasRefreshMetaTag(main_frame)) |
353 return; | 346 return; |
354 | 347 |
355 CapturePageInfoLater( | 348 CapturePageInfoLater( |
356 render_view()->GetPageId(), | |
357 false, // preliminary_capture | 349 false, // preliminary_capture |
358 base::TimeDelta::FromMilliseconds( | 350 base::TimeDelta::FromMilliseconds( |
359 render_view()->GetContentStateImmediately() ? | 351 render_view()->GetContentStateImmediately() ? |
360 0 : kDelayForCaptureMs)); | 352 0 : kDelayForCaptureMs)); |
361 } | 353 } |
362 | 354 |
363 void ChromeRenderViewObserver::DidCommitProvisionalLoad( | 355 void ChromeRenderViewObserver::DidCommitProvisionalLoad( |
364 WebLocalFrame* frame, bool is_new_navigation) { | 356 WebLocalFrame* frame, bool is_new_navigation) { |
365 // Don't capture pages being not new, or including refresh meta tag. | 357 // Don't capture pages being not new, or including refresh meta tag. |
366 if (!is_new_navigation || HasRefreshMetaTag(frame)) | 358 if (!is_new_navigation || HasRefreshMetaTag(frame)) |
367 return; | 359 return; |
368 | 360 |
369 CapturePageInfoLater( | 361 CapturePageInfoLater( |
370 render_view()->GetPageId(), | |
371 true, // preliminary_capture | 362 true, // preliminary_capture |
372 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs)); | 363 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs)); |
373 } | 364 } |
374 | 365 |
375 void ChromeRenderViewObserver::CapturePageInfoLater(int page_id, | 366 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture, |
376 bool preliminary_capture, | |
377 base::TimeDelta delay) { | 367 base::TimeDelta delay) { |
378 capture_timer_.Start( | 368 capture_timer_.Start( |
379 FROM_HERE, | 369 FROM_HERE, |
380 delay, | 370 delay, |
381 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, | 371 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, |
382 base::Unretained(this), | 372 base::Unretained(this), |
383 page_id, | |
384 preliminary_capture)); | 373 preliminary_capture)); |
385 } | 374 } |
386 | 375 |
387 void ChromeRenderViewObserver::CapturePageInfo(int page_id, | 376 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) { |
388 bool preliminary_capture) { | |
389 // If |page_id| is obsolete, we should stop indexing and capturing a page. | |
390 if (render_view()->GetPageId() != page_id) | |
awong
2014/07/07 18:36:59
This looks like a guard against the RenderView loa
Avi (use Gerrit)
2014/07/07 18:49:46
I'm not convinced this check for page id is even n
Lei Zhang
2014/07/07 19:37:04
Can the page id change if |is_new_navigation| in D
Avi (use Gerrit)
2014/07/07 19:51:22
I don't know. I can reset the timer no matter what
| |
391 return; | |
392 | |
393 if (!render_view()->GetWebView()) | 377 if (!render_view()->GetWebView()) |
394 return; | 378 return; |
395 | 379 |
396 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); | 380 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); |
397 if (!main_frame) | 381 if (!main_frame) |
398 return; | 382 return; |
399 | 383 |
400 // Don't index/capture pages that are in view source mode. | 384 // Don't index/capture pages that are in view source mode. |
401 if (main_frame->isViewSourceModeEnabled()) | 385 if (main_frame->isViewSourceModeEnabled()) |
402 return; | 386 return; |
(...skipping 13 matching lines...) Expand all Loading... | |
416 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the | 400 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the |
417 // translate helper for language detection and possible translation. | 401 // translate helper for language detection and possible translation. |
418 base::string16 contents; | 402 base::string16 contents; |
419 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); | 403 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); |
420 CaptureText(main_frame, &contents); | 404 CaptureText(main_frame, &contents); |
421 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, | 405 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, |
422 base::TimeTicks::Now() - capture_begin_time); | 406 base::TimeTicks::Now() - capture_begin_time); |
423 if (translate_helper_) | 407 if (translate_helper_) |
424 translate_helper_->PageCaptured(contents); | 408 translate_helper_->PageCaptured(contents); |
425 | 409 |
426 // TODO(shess): Is indexing "Full text search" indexing? In that | |
427 // case more of this can go. | |
428 // Skip indexing if this is not a new load. Note that the case where | |
429 // page_id == last_indexed_page_id_ is more complicated, since we need to | |
430 // reindex if the toplevel URL has changed (such as from a redirect), even | |
431 // though this may not cause the page id to be incremented. | |
432 if (page_id < last_indexed_page_id_) | |
433 return; | |
434 | |
435 bool same_page_id = last_indexed_page_id_ == page_id; | |
436 if (!preliminary_capture) | |
437 last_indexed_page_id_ = page_id; | |
438 | |
439 // Get the URL for this page. | |
440 GURL url(main_frame->document().url()); | |
441 if (url.is_empty()) { | |
442 if (!preliminary_capture) | |
443 last_indexed_url_ = GURL(); | |
444 return; | |
445 } | |
446 | |
447 // If the page id is unchanged, check whether the URL (ignoring fragments) | |
448 // has changed. If so, we need to reindex. Otherwise, assume this is a | |
449 // reload, in-page navigation, or some other load type where we don't want to | |
450 // reindex. Note: subframe navigations after onload increment the page id, | |
451 // so these will trigger a reindex. | |
452 GURL stripped_url(StripRef(url)); | |
awong
2014/07/07 18:36:59
In this observer, the page_id seems to be used as
Avi (use Gerrit)
2014/07/07 18:49:46
The classifier does check for PAGE_TRANSITION_FORW
Lei Zhang
2014/07/07 19:37:04
The PAGE_TRANSITION_FORWARD_BACK was added in r833
Avi (use Gerrit)
2014/07/07 19:51:22
So the phishing classifier runs an extra time if t
| |
453 if (same_page_id && stripped_url == last_indexed_url_) | |
454 return; | |
455 | |
456 if (!preliminary_capture) | |
457 last_indexed_url_ = stripped_url; | |
458 | |
459 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); | 410 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); |
460 | 411 |
461 #if defined(FULL_SAFE_BROWSING) | 412 #if defined(FULL_SAFE_BROWSING) |
462 // Will swap out the string. | 413 // Will swap out the string. |
463 if (phishing_classifier_) | 414 if (phishing_classifier_) |
464 phishing_classifier_->PageCaptured(&contents, preliminary_capture); | 415 phishing_classifier_->PageCaptured(&contents, preliminary_capture); |
465 #endif | 416 #endif |
466 } | 417 } |
467 | 418 |
468 void ChromeRenderViewObserver::CaptureText(WebFrame* frame, | 419 void ChromeRenderViewObserver::CaptureText(WebFrame* frame, |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
514 WebElement element = node.to<WebElement>(); | 465 WebElement element = node.to<WebElement>(); |
515 if (!element.hasHTMLTagName(tag_name)) | 466 if (!element.hasHTMLTagName(tag_name)) |
516 continue; | 467 continue; |
517 WebString value = element.getAttribute(attribute_name); | 468 WebString value = element.getAttribute(attribute_name); |
518 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) | 469 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) |
519 continue; | 470 continue; |
520 return true; | 471 return true; |
521 } | 472 } |
522 return false; | 473 return false; |
523 } | 474 } |
OLD | NEW |