| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/chrome_render_frame_observer.h" | 5 #include "chrome/renderer/chrome_render_frame_observer.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 | 8 |
| 9 #include <limits> | 9 #include <limits> |
| 10 #include <string> | 10 #include <string> |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 46 #endif | 46 #endif |
| 47 | 47 |
| 48 using blink::WebDataSource; | 48 using blink::WebDataSource; |
| 49 using blink::WebElement; | 49 using blink::WebElement; |
| 50 using blink::WebLocalFrame; | 50 using blink::WebLocalFrame; |
| 51 using blink::WebNode; | 51 using blink::WebNode; |
| 52 using blink::WebString; | 52 using blink::WebString; |
| 53 using content::SSLStatus; | 53 using content::SSLStatus; |
| 54 using content::RenderFrame; | 54 using content::RenderFrame; |
| 55 | 55 |
| 56 // Delay in milliseconds that we'll wait before capturing the page contents. | |
| 57 static const int kDelayForCaptureMs = 500; | |
| 58 | |
| 59 // Typically, we capture the page data once the page is loaded. | |
| 60 // Sometimes, the page never finishes to load, preventing the page capture | |
| 61 // To workaround this problem, we always perform a capture after the following | |
| 62 // delay. | |
| 63 static const int kDelayForForcedCaptureMs = 6000; | |
| 64 | |
| 65 // Maximum number of characters in the document to index. | 56 // Maximum number of characters in the document to index. |
| 66 // Any text beyond this point will be clipped. | 57 // Any text beyond this point will be clipped. |
| 67 static const size_t kMaxIndexChars = 65535; | 58 static const size_t kMaxIndexChars = 65535; |
| 68 | 59 |
| 69 // Constants for UMA statistic collection. | 60 // Constants for UMA statistic collection. |
| 70 static const char kTranslateCaptureText[] = "Translate.CaptureText"; | 61 static const char kTranslateCaptureText[] = "Translate.CaptureText"; |
| 71 | 62 |
| 72 namespace { | 63 namespace { |
| 73 | 64 |
| 74 // If the source image is null or occupies less area than | 65 // If the source image is null or occupies less area than |
| (...skipping 30 matching lines...) Expand all Loading... |
| 105 skia::ImageOperations::RESIZE_GOOD, | 96 skia::ImageOperations::RESIZE_GOOD, |
| 106 static_cast<int>(scaled_size.width()), | 97 static_cast<int>(scaled_size.width()), |
| 107 static_cast<int>(scaled_size.height())); | 98 static_cast<int>(scaled_size.height())); |
| 108 } | 99 } |
| 109 | 100 |
| 110 } // namespace | 101 } // namespace |
| 111 | 102 |
| 112 ChromeRenderFrameObserver::ChromeRenderFrameObserver( | 103 ChromeRenderFrameObserver::ChromeRenderFrameObserver( |
| 113 content::RenderFrame* render_frame) | 104 content::RenderFrame* render_frame) |
| 114 : content::RenderFrameObserver(render_frame), | 105 : content::RenderFrameObserver(render_frame), |
| 115 capture_timer_(false, false), | |
| 116 translate_helper_(nullptr), | 106 translate_helper_(nullptr), |
| 117 phishing_classifier_(nullptr) { | 107 phishing_classifier_(nullptr) { |
| 118 // Don't do anything for subframes. | 108 // Don't do anything for subframes. |
| 119 if (!render_frame->IsMainFrame()) | 109 if (!render_frame->IsMainFrame()) |
| 120 return; | 110 return; |
| 121 | 111 |
| 122 const base::CommandLine& command_line = | 112 const base::CommandLine& command_line = |
| 123 *base::CommandLine::ForCurrentProcess(); | 113 *base::CommandLine::ForCurrentProcess(); |
| 124 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) | 114 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) |
| 125 OnSetClientSidePhishingDetection(true); | 115 OnSetClientSidePhishingDetection(true); |
| (...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 300 if (frame->parent()) | 290 if (frame->parent()) |
| 301 return; | 291 return; |
| 302 | 292 |
| 303 GURL osdd_url = frame->document().openSearchDescriptionURL(); | 293 GURL osdd_url = frame->document().openSearchDescriptionURL(); |
| 304 if (!osdd_url.is_empty()) { | 294 if (!osdd_url.is_empty()) { |
| 305 Send(new ChromeViewHostMsg_PageHasOSDD( | 295 Send(new ChromeViewHostMsg_PageHasOSDD( |
| 306 routing_id(), frame->document().url(), osdd_url, | 296 routing_id(), frame->document().url(), osdd_url, |
| 307 search_provider::AUTODETECTED_PROVIDER)); | 297 search_provider::AUTODETECTED_PROVIDER)); |
| 308 } | 298 } |
| 309 | 299 |
| 310 // Don't capture pages that have pending redirect or location change. | 300 // TODO(dglazkov): This is only necessary for ChromeRenderViewTests, |
| 311 if (frame->isNavigationScheduled()) | 301 // since they don't actually pump frames. These tests will need |
| 312 return; | 302 // to be rewritten eventually (there is no ChromeRenderView anymore). |
| 313 | 303 if (render_frame()->GetRenderView()->GetContentStateImmediately()) { |
| 314 CapturePageTextLater( | 304 CapturePageText(PRELIMINARY_CAPTURE); |
| 315 FINAL_CAPTURE, | 305 } |
| 316 base::TimeDelta::FromMilliseconds( | |
| 317 render_frame()->GetRenderView()->GetContentStateImmediately() | |
| 318 ? 0 | |
| 319 : kDelayForCaptureMs)); | |
| 320 } | 306 } |
| 321 | 307 |
| 322 void ChromeRenderFrameObserver::DidStartProvisionalLoad() { | 308 void ChromeRenderFrameObserver::DidStartProvisionalLoad() { |
| 323 // Let translate_helper do any preparatory work for loading a URL. | 309 // Let translate_helper do any preparatory work for loading a URL. |
| 324 if (!translate_helper_) | 310 if (!translate_helper_) |
| 325 return; | 311 return; |
| 326 | 312 |
| 327 translate_helper_->PrepareForUrl( | 313 translate_helper_->PrepareForUrl( |
| 328 render_frame()->GetWebFrame()->document().url()); | 314 render_frame()->GetWebFrame()->document().url()); |
| 329 } | 315 } |
| 330 | 316 |
| 331 void ChromeRenderFrameObserver::DidCommitProvisionalLoad( | 317 void ChromeRenderFrameObserver::DidCommitProvisionalLoad( |
| 332 bool is_new_navigation, | 318 bool is_new_navigation, |
| 333 bool is_same_page_navigation) { | 319 bool is_same_page_navigation) { |
| 334 WebLocalFrame* frame = render_frame()->GetWebFrame(); | 320 WebLocalFrame* frame = render_frame()->GetWebFrame(); |
| 335 | 321 |
| 336 // Don't do anything for subframes. | 322 // Don't do anything for subframes. |
| 337 if (frame->parent()) | 323 if (frame->parent()) |
| 338 return; | 324 return; |
| 339 | 325 |
| 340 // Don't capture pages being not new, with pending redirect, or location | |
| 341 // change. | |
| 342 if (!is_new_navigation || frame->isNavigationScheduled()) | |
| 343 return; | |
| 344 | |
| 345 base::debug::SetCrashKeyValue( | 326 base::debug::SetCrashKeyValue( |
| 346 crash_keys::kViewCount, | 327 crash_keys::kViewCount, |
| 347 base::SizeTToString(content::RenderView::GetRenderViewCount())); | 328 base::SizeTToString(content::RenderView::GetRenderViewCount())); |
| 348 | |
| 349 CapturePageTextLater(PRELIMINARY_CAPTURE, base::TimeDelta::FromMilliseconds( | |
| 350 kDelayForForcedCaptureMs)); | |
| 351 } | 329 } |
| 352 | 330 |
| 353 void ChromeRenderFrameObserver::CapturePageText(TextCaptureType capture_type) { | 331 void ChromeRenderFrameObserver::CapturePageText(TextCaptureType capture_type) { |
| 354 WebLocalFrame* frame = render_frame()->GetWebFrame(); | 332 WebLocalFrame* frame = render_frame()->GetWebFrame(); |
| 355 if (!frame) | 333 if (!frame) |
| 356 return; | 334 return; |
| 357 | 335 |
| 336 // Don't capture pages that have pending redirect or location change. |
| 337 if (frame->isNavigationScheduled()) |
| 338 return; |
| 339 |
| 358 // Don't index/capture pages that are in view source mode. | 340 // Don't index/capture pages that are in view source mode. |
| 359 if (frame->isViewSourceModeEnabled()) | 341 if (frame->isViewSourceModeEnabled()) |
| 360 return; | 342 return; |
| 361 | 343 |
| 362 // Don't capture text of the error pages. | 344 // Don't capture text of the error pages. |
| 363 WebDataSource* ds = frame->dataSource(); | 345 WebDataSource* ds = frame->dataSource(); |
| 364 if (ds && ds->hasUnreachableURL()) | 346 if (ds && ds->hasUnreachableURL()) |
| 365 return; | 347 return; |
| 366 | 348 |
| 367 // Don't index/capture pages that are being prerendered. | 349 // Don't index/capture pages that are being prerendered. |
| 368 if (prerender::PrerenderHelper::IsPrerendering(render_frame())) | 350 if (prerender::PrerenderHelper::IsPrerendering(render_frame())) |
| 369 return; | 351 return; |
| 370 | 352 |
| 371 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); | 353 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); |
| 372 | 354 |
| 373 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the | 355 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the |
| 374 // translate helper for language detection and possible translation. | 356 // translate helper for language detection and possible translation. |
| 375 base::string16 contents = frame->contentAsText(kMaxIndexChars); | 357 base::string16 contents = frame->contentAsText(kMaxIndexChars); |
| 376 | 358 |
| 377 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, | 359 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, |
| 378 base::TimeTicks::Now() - capture_begin_time); | 360 base::TimeTicks::Now() - capture_begin_time); |
| 379 | 361 |
| 380 if (translate_helper_) | 362 // We should run language detection only once. Parsing finishes before |
| 363 // the page loads, so let's pick that timing. |
| 364 if (translate_helper_ && capture_type == PRELIMINARY_CAPTURE) |
| 381 translate_helper_->PageCaptured(contents); | 365 translate_helper_->PageCaptured(contents); |
| 382 | 366 |
| 383 TRACE_EVENT0("renderer", "ChromeRenderFrameObserver::CapturePageText"); | 367 TRACE_EVENT0("renderer", "ChromeRenderFrameObserver::CapturePageText"); |
| 384 | 368 |
| 385 #if defined(SAFE_BROWSING_CSD) | 369 #if defined(SAFE_BROWSING_CSD) |
| 386 // Will swap out the string. | 370 // Will swap out the string. |
| 387 if (phishing_classifier_) | 371 if (phishing_classifier_) |
| 388 phishing_classifier_->PageCaptured(&contents, | 372 phishing_classifier_->PageCaptured(&contents, |
| 389 capture_type == PRELIMINARY_CAPTURE); | 373 capture_type == PRELIMINARY_CAPTURE); |
| 390 #endif | 374 #endif |
| 391 } | 375 } |
| 392 | 376 |
| 393 void ChromeRenderFrameObserver::CapturePageTextLater( | 377 void ChromeRenderFrameObserver::DidMeaningfulLayout( |
| 394 TextCaptureType capture_type, | 378 blink::WebMeaningfulLayout layout_type) { |
| 395 base::TimeDelta delay) { | 379 switch (layout_type) { |
| 396 capture_timer_.Start(FROM_HERE, delay, | 380 case blink::WebMeaningfulLayout::FinishedParsing: |
| 397 base::Bind(&ChromeRenderFrameObserver::CapturePageText, | 381 CapturePageText(PRELIMINARY_CAPTURE); |
| 398 base::Unretained(this), capture_type)); | 382 break; |
| 383 case blink::WebMeaningfulLayout::FinishedLoading: |
| 384 CapturePageText(FINAL_CAPTURE); |
| 385 break; |
| 386 default: |
| 387 break; |
| 388 } |
| 399 } | 389 } |
| OLD | NEW |