| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/chrome_render_frame_observer.h" | 5 #include "chrome/renderer/chrome_render_frame_observer.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 | 8 |
| 9 #include <limits> | 9 #include <limits> |
| 10 #include <string> | 10 #include <string> |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 46 #endif | 46 #endif |
| 47 | 47 |
| 48 using blink::WebDataSource; | 48 using blink::WebDataSource; |
| 49 using blink::WebElement; | 49 using blink::WebElement; |
| 50 using blink::WebLocalFrame; | 50 using blink::WebLocalFrame; |
| 51 using blink::WebNode; | 51 using blink::WebNode; |
| 52 using blink::WebString; | 52 using blink::WebString; |
| 53 using content::SSLStatus; | 53 using content::SSLStatus; |
| 54 using content::RenderFrame; | 54 using content::RenderFrame; |
| 55 | 55 |
| 56 // Delay in milliseconds that we'll wait before capturing the page contents. |
| 57 static const int kDelayForCaptureMs = 500; |
| 58 |
| 59 // Typically, we capture the page data once the page is loaded. |
| 60 // Sometimes, the page never finishes to load, preventing the page capture |
| 61 // To workaround this problem, we always perform a capture after the following |
| 62 // delay. |
| 63 static const int kDelayForForcedCaptureMs = 6000; |
| 64 |
| 56 // Maximum number of characters in the document to index. | 65 // Maximum number of characters in the document to index. |
| 57 // Any text beyond this point will be clipped. | 66 // Any text beyond this point will be clipped. |
| 58 static const size_t kMaxIndexChars = 65535; | 67 static const size_t kMaxIndexChars = 65535; |
| 59 | 68 |
| 60 // Constants for UMA statistic collection. | 69 // Constants for UMA statistic collection. |
| 61 static const char kTranslateCaptureText[] = "Translate.CaptureText"; | 70 static const char kTranslateCaptureText[] = "Translate.CaptureText"; |
| 62 | 71 |
| 63 namespace { | 72 namespace { |
| 64 | 73 |
| 65 // If the source image is null or occupies less area than | 74 // If the source image is null or occupies less area than |
| (...skipping 30 matching lines...) Expand all Loading... |
| 96 skia::ImageOperations::RESIZE_GOOD, | 105 skia::ImageOperations::RESIZE_GOOD, |
| 97 static_cast<int>(scaled_size.width()), | 106 static_cast<int>(scaled_size.width()), |
| 98 static_cast<int>(scaled_size.height())); | 107 static_cast<int>(scaled_size.height())); |
| 99 } | 108 } |
| 100 | 109 |
| 101 } // namespace | 110 } // namespace |
| 102 | 111 |
| 103 ChromeRenderFrameObserver::ChromeRenderFrameObserver( | 112 ChromeRenderFrameObserver::ChromeRenderFrameObserver( |
| 104 content::RenderFrame* render_frame) | 113 content::RenderFrame* render_frame) |
| 105 : content::RenderFrameObserver(render_frame), | 114 : content::RenderFrameObserver(render_frame), |
| 115 capture_timer_(false, false), |
| 106 translate_helper_(nullptr), | 116 translate_helper_(nullptr), |
| 107 phishing_classifier_(nullptr) { | 117 phishing_classifier_(nullptr) { |
| 108 // Don't do anything for subframes. | 118 // Don't do anything for subframes. |
| 109 if (!render_frame->IsMainFrame()) | 119 if (!render_frame->IsMainFrame()) |
| 110 return; | 120 return; |
| 111 | 121 |
| 112 const base::CommandLine& command_line = | 122 const base::CommandLine& command_line = |
| 113 *base::CommandLine::ForCurrentProcess(); | 123 *base::CommandLine::ForCurrentProcess(); |
| 114 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) | 124 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) |
| 115 OnSetClientSidePhishingDetection(true); | 125 OnSetClientSidePhishingDetection(true); |
| (...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 290 if (frame->parent()) | 300 if (frame->parent()) |
| 291 return; | 301 return; |
| 292 | 302 |
| 293 GURL osdd_url = frame->document().openSearchDescriptionURL(); | 303 GURL osdd_url = frame->document().openSearchDescriptionURL(); |
| 294 if (!osdd_url.is_empty()) { | 304 if (!osdd_url.is_empty()) { |
| 295 Send(new ChromeViewHostMsg_PageHasOSDD( | 305 Send(new ChromeViewHostMsg_PageHasOSDD( |
| 296 routing_id(), frame->document().url(), osdd_url, | 306 routing_id(), frame->document().url(), osdd_url, |
| 297 search_provider::AUTODETECTED_PROVIDER)); | 307 search_provider::AUTODETECTED_PROVIDER)); |
| 298 } | 308 } |
| 299 | 309 |
| 300 // TODO(dglazkov): This is only necessary for ChromeRenderViewTests, | 310 // Don't capture pages that have pending redirect or location change. |
| 301 // since they don't actually pump frames. These tests will need | 311 if (frame->isNavigationScheduled()) |
| 302 // to be rewritten eventually (there is no ChromeRenderView anymore). | 312 return; |
| 303 if (render_frame()->GetRenderView()->GetContentStateImmediately()) { | 313 |
| 304 CapturePageText(PRELIMINARY_CAPTURE); | 314 CapturePageTextLater( |
| 305 } | 315 FINAL_CAPTURE, |
| 316 base::TimeDelta::FromMilliseconds( |
| 317 render_frame()->GetRenderView()->GetContentStateImmediately() |
| 318 ? 0 |
| 319 : kDelayForCaptureMs)); |
| 306 } | 320 } |
| 307 | 321 |
| 308 void ChromeRenderFrameObserver::DidStartProvisionalLoad() { | 322 void ChromeRenderFrameObserver::DidStartProvisionalLoad() { |
| 309 // Let translate_helper do any preparatory work for loading a URL. | 323 // Let translate_helper do any preparatory work for loading a URL. |
| 310 if (!translate_helper_) | 324 if (!translate_helper_) |
| 311 return; | 325 return; |
| 312 | 326 |
| 313 translate_helper_->PrepareForUrl( | 327 translate_helper_->PrepareForUrl( |
| 314 render_frame()->GetWebFrame()->document().url()); | 328 render_frame()->GetWebFrame()->document().url()); |
| 315 } | 329 } |
| 316 | 330 |
| 317 void ChromeRenderFrameObserver::DidCommitProvisionalLoad( | 331 void ChromeRenderFrameObserver::DidCommitProvisionalLoad( |
| 318 bool is_new_navigation, | 332 bool is_new_navigation, |
| 319 bool is_same_page_navigation) { | 333 bool is_same_page_navigation) { |
| 320 WebLocalFrame* frame = render_frame()->GetWebFrame(); | 334 WebLocalFrame* frame = render_frame()->GetWebFrame(); |
| 321 | 335 |
| 322 // Don't do anything for subframes. | 336 // Don't do anything for subframes. |
| 323 if (frame->parent()) | 337 if (frame->parent()) |
| 324 return; | 338 return; |
| 325 | 339 |
| 340 // Don't capture pages being not new, with pending redirect, or location |
| 341 // change. |
| 342 if (!is_new_navigation || frame->isNavigationScheduled()) |
| 343 return; |
| 344 |
| 326 base::debug::SetCrashKeyValue( | 345 base::debug::SetCrashKeyValue( |
| 327 crash_keys::kViewCount, | 346 crash_keys::kViewCount, |
| 328 base::SizeTToString(content::RenderView::GetRenderViewCount())); | 347 base::SizeTToString(content::RenderView::GetRenderViewCount())); |
| 348 |
| 349 CapturePageTextLater(PRELIMINARY_CAPTURE, base::TimeDelta::FromMilliseconds( |
| 350 kDelayForForcedCaptureMs)); |
| 329 } | 351 } |
| 330 | 352 |
| 331 void ChromeRenderFrameObserver::CapturePageText(TextCaptureType capture_type) { | 353 void ChromeRenderFrameObserver::CapturePageText(TextCaptureType capture_type) { |
| 332 WebLocalFrame* frame = render_frame()->GetWebFrame(); | 354 WebLocalFrame* frame = render_frame()->GetWebFrame(); |
| 333 if (!frame) | 355 if (!frame) |
| 334 return; | 356 return; |
| 335 | 357 |
| 336 // Don't capture pages that have pending redirect or location change. | |
| 337 if (frame->isNavigationScheduled()) | |
| 338 return; | |
| 339 | |
| 340 // Don't index/capture pages that are in view source mode. | 358 // Don't index/capture pages that are in view source mode. |
| 341 if (frame->isViewSourceModeEnabled()) | 359 if (frame->isViewSourceModeEnabled()) |
| 342 return; | 360 return; |
| 343 | 361 |
| 344 // Don't capture text of the error pages. | 362 // Don't capture text of the error pages. |
| 345 WebDataSource* ds = frame->dataSource(); | 363 WebDataSource* ds = frame->dataSource(); |
| 346 if (ds && ds->hasUnreachableURL()) | 364 if (ds && ds->hasUnreachableURL()) |
| 347 return; | 365 return; |
| 348 | 366 |
| 349 // Don't index/capture pages that are being prerendered. | 367 // Don't index/capture pages that are being prerendered. |
| 350 if (prerender::PrerenderHelper::IsPrerendering(render_frame())) | 368 if (prerender::PrerenderHelper::IsPrerendering(render_frame())) |
| 351 return; | 369 return; |
| 352 | 370 |
| 353 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); | 371 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); |
| 354 | 372 |
| 355 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the | 373 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the |
| 356 // translate helper for language detection and possible translation. | 374 // translate helper for language detection and possible translation. |
| 357 base::string16 contents = frame->contentAsText(kMaxIndexChars); | 375 base::string16 contents = frame->contentAsText(kMaxIndexChars); |
| 358 | 376 |
| 359 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, | 377 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, |
| 360 base::TimeTicks::Now() - capture_begin_time); | 378 base::TimeTicks::Now() - capture_begin_time); |
| 361 | 379 |
| 362 // We should run language detection only once. Parsing finishes before | 380 if (translate_helper_) |
| 363 // the page loads, so let's pick that timing. | |
| 364 if (translate_helper_ && capture_type == PRELIMINARY_CAPTURE) | |
| 365 translate_helper_->PageCaptured(contents); | 381 translate_helper_->PageCaptured(contents); |
| 366 | 382 |
| 367 TRACE_EVENT0("renderer", "ChromeRenderFrameObserver::CapturePageText"); | 383 TRACE_EVENT0("renderer", "ChromeRenderFrameObserver::CapturePageText"); |
| 368 | 384 |
| 369 #if defined(SAFE_BROWSING_CSD) | 385 #if defined(SAFE_BROWSING_CSD) |
| 370 // Will swap out the string. | 386 // Will swap out the string. |
| 371 if (phishing_classifier_) | 387 if (phishing_classifier_) |
| 372 phishing_classifier_->PageCaptured(&contents, | 388 phishing_classifier_->PageCaptured(&contents, |
| 373 capture_type == PRELIMINARY_CAPTURE); | 389 capture_type == PRELIMINARY_CAPTURE); |
| 374 #endif | 390 #endif |
| 375 } | 391 } |
| 376 | 392 |
| 377 void ChromeRenderFrameObserver::DidMeaningfulLayout( | 393 void ChromeRenderFrameObserver::CapturePageTextLater( |
| 378 blink::WebMeaningfulLayout layout_type) { | 394 TextCaptureType capture_type, |
| 379 switch (layout_type) { | 395 base::TimeDelta delay) { |
| 380 case blink::WebMeaningfulLayout::FinishedParsing: | 396 capture_timer_.Start(FROM_HERE, delay, |
| 381 CapturePageText(PRELIMINARY_CAPTURE); | 397 base::Bind(&ChromeRenderFrameObserver::CapturePageText, |
| 382 break; | 398 base::Unretained(this), capture_type)); |
| 383 case blink::WebMeaningfulLayout::FinishedLoading: | |
| 384 CapturePageText(FINAL_CAPTURE); | |
| 385 break; | |
| 386 default: | |
| 387 break; | |
| 388 } | |
| 389 } | 399 } |
| OLD | NEW |