Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/chrome_render_view_observer.h" | 5 #include "chrome/renderer/chrome_render_view_observer.h" |
| 6 | 6 |
| 7 #include "base/bind.h" | 7 #include "base/bind.h" |
| 8 #include "base/bind_helpers.h" | 8 #include "base/bind_helpers.h" |
| 9 #include "base/command_line.h" | 9 #include "base/command_line.h" |
| 10 #include "base/debug/trace_event.h" | 10 #include "base/debug/trace_event.h" |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 89 | 89 |
| 90 // maximum number of characters in the document to index, any text beyond this | 90 // maximum number of characters in the document to index, any text beyond this |
| 91 // point will be clipped | 91 // point will be clipped |
| 92 static const size_t kMaxIndexChars = 65535; | 92 static const size_t kMaxIndexChars = 65535; |
| 93 | 93 |
| 94 // Constants for UMA statistic collection. | 94 // Constants for UMA statistic collection. |
| 95 static const char kTranslateCaptureText[] = "Translate.CaptureText"; | 95 static const char kTranslateCaptureText[] = "Translate.CaptureText"; |
| 96 | 96 |
| 97 namespace { | 97 namespace { |
| 98 | 98 |
| 99 GURL StripRef(const GURL& url) { | |
| 100 GURL::Replacements replacements; | |
| 101 replacements.ClearRef(); | |
| 102 return url.ReplaceComponents(replacements); | |
| 103 } | |
| 104 | |
| 105 #if defined(OS_ANDROID) | 99 #if defined(OS_ANDROID) |
| 106 // Parses the DOM for a <meta> tag with a particular name. | 100 // Parses the DOM for a <meta> tag with a particular name. |
| 107 // |meta_tag_content| is set to the contents of the 'content' attribute. | 101 // |meta_tag_content| is set to the contents of the 'content' attribute. |
| 108 // |found_tag| is set to true if the tag was successfully found. | 102 // |found_tag| is set to true if the tag was successfully found. |
| 109 // Returns true if the document was parsed without errors. | 103 // Returns true if the document was parsed without errors. |
| 110 bool RetrieveMetaTagContent(const WebFrame* main_frame, | 104 bool RetrieveMetaTagContent(const WebFrame* main_frame, |
| 111 const GURL& expected_url, | 105 const GURL& expected_url, |
| 112 const std::string& meta_tag_name, | 106 const std::string& meta_tag_name, |
| 113 bool* found_tag, | 107 bool* found_tag, |
| 114 std::string* meta_tag_content) { | 108 std::string* meta_tag_content) { |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 154 | 148 |
| 155 } // namespace | 149 } // namespace |
| 156 | 150 |
| 157 ChromeRenderViewObserver::ChromeRenderViewObserver( | 151 ChromeRenderViewObserver::ChromeRenderViewObserver( |
| 158 content::RenderView* render_view, | 152 content::RenderView* render_view, |
| 159 ChromeRenderProcessObserver* chrome_render_process_observer) | 153 ChromeRenderProcessObserver* chrome_render_process_observer) |
| 160 : content::RenderViewObserver(render_view), | 154 : content::RenderViewObserver(render_view), |
| 161 chrome_render_process_observer_(chrome_render_process_observer), | 155 chrome_render_process_observer_(chrome_render_process_observer), |
| 162 translate_helper_(new TranslateHelper(render_view)), | 156 translate_helper_(new TranslateHelper(render_view)), |
| 163 phishing_classifier_(NULL), | 157 phishing_classifier_(NULL), |
| 164 last_indexed_page_id_(-1), | |
| 165 capture_timer_(false, false) { | 158 capture_timer_(false, false) { |
| 166 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); | 159 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); |
| 167 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) | 160 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) |
| 168 OnSetClientSidePhishingDetection(true); | 161 OnSetClientSidePhishingDetection(true); |
| 169 } | 162 } |
| 170 | 163 |
| 171 ChromeRenderViewObserver::~ChromeRenderViewObserver() { | 164 ChromeRenderViewObserver::~ChromeRenderViewObserver() { |
| 172 } | 165 } |
| 173 | 166 |
| 174 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) { | 167 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) { |
| (...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 346 Send(new ChromeViewHostMsg_PageHasOSDD( | 339 Send(new ChromeViewHostMsg_PageHasOSDD( |
| 347 routing_id(), main_frame->document().url(), osdd_url, | 340 routing_id(), main_frame->document().url(), osdd_url, |
| 348 search_provider::AUTODETECTED_PROVIDER)); | 341 search_provider::AUTODETECTED_PROVIDER)); |
| 349 } | 342 } |
| 350 | 343 |
| 351 // Don't capture pages including refresh meta tag. | 344 // Don't capture pages including refresh meta tag. |
| 352 if (HasRefreshMetaTag(main_frame)) | 345 if (HasRefreshMetaTag(main_frame)) |
| 353 return; | 346 return; |
| 354 | 347 |
| 355 CapturePageInfoLater( | 348 CapturePageInfoLater( |
| 356 render_view()->GetPageId(), | |
| 357 false, // preliminary_capture | 349 false, // preliminary_capture |
| 358 base::TimeDelta::FromMilliseconds( | 350 base::TimeDelta::FromMilliseconds( |
| 359 render_view()->GetContentStateImmediately() ? | 351 render_view()->GetContentStateImmediately() ? |
| 360 0 : kDelayForCaptureMs)); | 352 0 : kDelayForCaptureMs)); |
| 361 } | 353 } |
| 362 | 354 |
| 363 void ChromeRenderViewObserver::DidCommitProvisionalLoad( | 355 void ChromeRenderViewObserver::DidCommitProvisionalLoad( |
| 364 WebLocalFrame* frame, bool is_new_navigation) { | 356 WebLocalFrame* frame, bool is_new_navigation) { |
| 365 // Don't capture pages being not new, or including refresh meta tag. | 357 // Don't capture pages being not new, or including refresh meta tag. |
| 366 if (!is_new_navigation || HasRefreshMetaTag(frame)) | 358 if (!is_new_navigation || HasRefreshMetaTag(frame)) |
| 367 return; | 359 return; |
| 368 | 360 |
| 369 CapturePageInfoLater( | 361 CapturePageInfoLater( |
| 370 render_view()->GetPageId(), | |
| 371 true, // preliminary_capture | 362 true, // preliminary_capture |
| 372 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs)); | 363 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs)); |
| 373 } | 364 } |
| 374 | 365 |
| 375 void ChromeRenderViewObserver::CapturePageInfoLater(int page_id, | 366 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture, |
| 376 bool preliminary_capture, | |
| 377 base::TimeDelta delay) { | 367 base::TimeDelta delay) { |
| 378 capture_timer_.Start( | 368 capture_timer_.Start( |
| 379 FROM_HERE, | 369 FROM_HERE, |
| 380 delay, | 370 delay, |
| 381 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, | 371 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, |
| 382 base::Unretained(this), | 372 base::Unretained(this), |
| 383 page_id, | |
| 384 preliminary_capture)); | 373 preliminary_capture)); |
| 385 } | 374 } |
| 386 | 375 |
| 387 void ChromeRenderViewObserver::CapturePageInfo(int page_id, | 376 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) { |
| 388 bool preliminary_capture) { | |
| 389 // If |page_id| is obsolete, we should stop indexing and capturing a page. | |
| 390 if (render_view()->GetPageId() != page_id) | |
|
awong
2014/07/07 18:36:59
This looks like a guard against the RenderView loa
Avi (use Gerrit)
2014/07/07 18:49:46
I'm not convinced this check for page id is even n
Lei Zhang
2014/07/07 19:37:04
Can the page id change if |is_new_navigation| in D
Avi (use Gerrit)
2014/07/07 19:51:22
I don't know. I can reset the timer no matter what
| |
| 391 return; | |
| 392 | |
| 393 if (!render_view()->GetWebView()) | 377 if (!render_view()->GetWebView()) |
| 394 return; | 378 return; |
| 395 | 379 |
| 396 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); | 380 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); |
| 397 if (!main_frame) | 381 if (!main_frame) |
| 398 return; | 382 return; |
| 399 | 383 |
| 400 // Don't index/capture pages that are in view source mode. | 384 // Don't index/capture pages that are in view source mode. |
| 401 if (main_frame->isViewSourceModeEnabled()) | 385 if (main_frame->isViewSourceModeEnabled()) |
| 402 return; | 386 return; |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 416 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the | 400 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the |
| 417 // translate helper for language detection and possible translation. | 401 // translate helper for language detection and possible translation. |
| 418 base::string16 contents; | 402 base::string16 contents; |
| 419 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); | 403 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); |
| 420 CaptureText(main_frame, &contents); | 404 CaptureText(main_frame, &contents); |
| 421 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, | 405 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, |
| 422 base::TimeTicks::Now() - capture_begin_time); | 406 base::TimeTicks::Now() - capture_begin_time); |
| 423 if (translate_helper_) | 407 if (translate_helper_) |
| 424 translate_helper_->PageCaptured(contents); | 408 translate_helper_->PageCaptured(contents); |
| 425 | 409 |
| 426 // TODO(shess): Is indexing "Full text search" indexing? In that | |
| 427 // case more of this can go. | |
| 428 // Skip indexing if this is not a new load. Note that the case where | |
| 429 // page_id == last_indexed_page_id_ is more complicated, since we need to | |
| 430 // reindex if the toplevel URL has changed (such as from a redirect), even | |
| 431 // though this may not cause the page id to be incremented. | |
| 432 if (page_id < last_indexed_page_id_) | |
| 433 return; | |
| 434 | |
| 435 bool same_page_id = last_indexed_page_id_ == page_id; | |
| 436 if (!preliminary_capture) | |
| 437 last_indexed_page_id_ = page_id; | |
| 438 | |
| 439 // Get the URL for this page. | |
| 440 GURL url(main_frame->document().url()); | |
| 441 if (url.is_empty()) { | |
| 442 if (!preliminary_capture) | |
| 443 last_indexed_url_ = GURL(); | |
| 444 return; | |
| 445 } | |
| 446 | |
| 447 // If the page id is unchanged, check whether the URL (ignoring fragments) | |
| 448 // has changed. If so, we need to reindex. Otherwise, assume this is a | |
| 449 // reload, in-page navigation, or some other load type where we don't want to | |
| 450 // reindex. Note: subframe navigations after onload increment the page id, | |
| 451 // so these will trigger a reindex. | |
| 452 GURL stripped_url(StripRef(url)); | |
|
awong
2014/07/07 18:36:59
In this observer, the page_id seems to be used as
Avi (use Gerrit)
2014/07/07 18:49:46
The classifier does check for PAGE_TRANSITION_FORW
Lei Zhang
2014/07/07 19:37:04
The PAGE_TRANSITION_FORWARD_BACK was added in r833
Avi (use Gerrit)
2014/07/07 19:51:22
So the phishing classifier runs an extra time if t
| |
| 453 if (same_page_id && stripped_url == last_indexed_url_) | |
| 454 return; | |
| 455 | |
| 456 if (!preliminary_capture) | |
| 457 last_indexed_url_ = stripped_url; | |
| 458 | |
| 459 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); | 410 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); |
| 460 | 411 |
| 461 #if defined(FULL_SAFE_BROWSING) | 412 #if defined(FULL_SAFE_BROWSING) |
| 462 // Will swap out the string. | 413 // Will swap out the string. |
| 463 if (phishing_classifier_) | 414 if (phishing_classifier_) |
| 464 phishing_classifier_->PageCaptured(&contents, preliminary_capture); | 415 phishing_classifier_->PageCaptured(&contents, preliminary_capture); |
| 465 #endif | 416 #endif |
| 466 } | 417 } |
| 467 | 418 |
| 468 void ChromeRenderViewObserver::CaptureText(WebFrame* frame, | 419 void ChromeRenderViewObserver::CaptureText(WebFrame* frame, |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 514 WebElement element = node.to<WebElement>(); | 465 WebElement element = node.to<WebElement>(); |
| 515 if (!element.hasHTMLTagName(tag_name)) | 466 if (!element.hasHTMLTagName(tag_name)) |
| 516 continue; | 467 continue; |
| 517 WebString value = element.getAttribute(attribute_name); | 468 WebString value = element.getAttribute(attribute_name); |
| 518 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) | 469 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) |
| 519 continue; | 470 continue; |
| 520 return true; | 471 return true; |
| 521 } | 472 } |
| 522 return false; | 473 return false; |
| 523 } | 474 } |
| OLD | NEW |