OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/chrome_render_frame_observer.h" | 5 #include "chrome/renderer/chrome_render_frame_observer.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 | 8 |
9 #include <limits> | 9 #include <limits> |
10 #include <string> | 10 #include <string> |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
46 #endif | 46 #endif |
47 | 47 |
48 using blink::WebDataSource; | 48 using blink::WebDataSource; |
49 using blink::WebElement; | 49 using blink::WebElement; |
50 using blink::WebLocalFrame; | 50 using blink::WebLocalFrame; |
51 using blink::WebNode; | 51 using blink::WebNode; |
52 using blink::WebString; | 52 using blink::WebString; |
53 using content::SSLStatus; | 53 using content::SSLStatus; |
54 using content::RenderFrame; | 54 using content::RenderFrame; |
55 | 55 |
| 56 // Delay in milliseconds that we'll wait before capturing the page contents. |
| 57 static const int kDelayForCaptureMs = 500; |
| 58 |
| 59 // Typically, we capture the page data once the page is loaded. |
| 60 // Sometimes, the page never finishes to load, preventing the page capture |
| 61 // To workaround this problem, we always perform a capture after the following |
| 62 // delay. |
| 63 static const int kDelayForForcedCaptureMs = 6000; |
| 64 |
56 // Maximum number of characters in the document to index. | 65 // Maximum number of characters in the document to index. |
57 // Any text beyond this point will be clipped. | 66 // Any text beyond this point will be clipped. |
58 static const size_t kMaxIndexChars = 65535; | 67 static const size_t kMaxIndexChars = 65535; |
59 | 68 |
60 // Constants for UMA statistic collection. | 69 // Constants for UMA statistic collection. |
61 static const char kTranslateCaptureText[] = "Translate.CaptureText"; | 70 static const char kTranslateCaptureText[] = "Translate.CaptureText"; |
62 | 71 |
63 namespace { | 72 namespace { |
64 | 73 |
65 // If the source image is null or occupies less area than | 74 // If the source image is null or occupies less area than |
(...skipping 30 matching lines...) Expand all Loading... |
96 skia::ImageOperations::RESIZE_GOOD, | 105 skia::ImageOperations::RESIZE_GOOD, |
97 static_cast<int>(scaled_size.width()), | 106 static_cast<int>(scaled_size.width()), |
98 static_cast<int>(scaled_size.height())); | 107 static_cast<int>(scaled_size.height())); |
99 } | 108 } |
100 | 109 |
101 } // namespace | 110 } // namespace |
102 | 111 |
103 ChromeRenderFrameObserver::ChromeRenderFrameObserver( | 112 ChromeRenderFrameObserver::ChromeRenderFrameObserver( |
104 content::RenderFrame* render_frame) | 113 content::RenderFrame* render_frame) |
105 : content::RenderFrameObserver(render_frame), | 114 : content::RenderFrameObserver(render_frame), |
| 115 capture_timer_(false, false), |
106 translate_helper_(nullptr), | 116 translate_helper_(nullptr), |
107 phishing_classifier_(nullptr) { | 117 phishing_classifier_(nullptr) { |
108 // Don't do anything for subframes. | 118 // Don't do anything for subframes. |
109 if (!render_frame->IsMainFrame()) | 119 if (!render_frame->IsMainFrame()) |
110 return; | 120 return; |
111 | 121 |
112 const base::CommandLine& command_line = | 122 const base::CommandLine& command_line = |
113 *base::CommandLine::ForCurrentProcess(); | 123 *base::CommandLine::ForCurrentProcess(); |
114 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) | 124 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) |
115 OnSetClientSidePhishingDetection(true); | 125 OnSetClientSidePhishingDetection(true); |
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
290 if (frame->parent()) | 300 if (frame->parent()) |
291 return; | 301 return; |
292 | 302 |
293 GURL osdd_url = frame->document().openSearchDescriptionURL(); | 303 GURL osdd_url = frame->document().openSearchDescriptionURL(); |
294 if (!osdd_url.is_empty()) { | 304 if (!osdd_url.is_empty()) { |
295 Send(new ChromeViewHostMsg_PageHasOSDD( | 305 Send(new ChromeViewHostMsg_PageHasOSDD( |
296 routing_id(), frame->document().url(), osdd_url, | 306 routing_id(), frame->document().url(), osdd_url, |
297 search_provider::AUTODETECTED_PROVIDER)); | 307 search_provider::AUTODETECTED_PROVIDER)); |
298 } | 308 } |
299 | 309 |
300 // TODO(dglazkov): This is only necessary for ChromeRenderViewTests, | 310 // Don't capture pages that have pending redirect or location change. |
301 // since they don't actually pump frames. These tests will need | 311 if (frame->isNavigationScheduled()) |
302 // to be rewritten eventually (there is no ChromeRenderView anymore). | 312 return; |
303 if (render_frame()->GetRenderView()->GetContentStateImmediately()) { | 313 |
304 CapturePageText(PRELIMINARY_CAPTURE); | 314 CapturePageTextLater( |
305 } | 315 FINAL_CAPTURE, |
| 316 base::TimeDelta::FromMilliseconds( |
| 317 render_frame()->GetRenderView()->GetContentStateImmediately() |
| 318 ? 0 |
| 319 : kDelayForCaptureMs)); |
306 } | 320 } |
307 | 321 |
308 void ChromeRenderFrameObserver::DidStartProvisionalLoad() { | 322 void ChromeRenderFrameObserver::DidStartProvisionalLoad() { |
309 // Let translate_helper do any preparatory work for loading a URL. | 323 // Let translate_helper do any preparatory work for loading a URL. |
310 if (!translate_helper_) | 324 if (!translate_helper_) |
311 return; | 325 return; |
312 | 326 |
313 translate_helper_->PrepareForUrl( | 327 translate_helper_->PrepareForUrl( |
314 render_frame()->GetWebFrame()->document().url()); | 328 render_frame()->GetWebFrame()->document().url()); |
315 } | 329 } |
316 | 330 |
317 void ChromeRenderFrameObserver::DidCommitProvisionalLoad( | 331 void ChromeRenderFrameObserver::DidCommitProvisionalLoad( |
318 bool is_new_navigation, | 332 bool is_new_navigation, |
319 bool is_same_page_navigation) { | 333 bool is_same_page_navigation) { |
320 WebLocalFrame* frame = render_frame()->GetWebFrame(); | 334 WebLocalFrame* frame = render_frame()->GetWebFrame(); |
321 | 335 |
322 // Don't do anything for subframes. | 336 // Don't do anything for subframes. |
323 if (frame->parent()) | 337 if (frame->parent()) |
324 return; | 338 return; |
325 | 339 |
| 340 // Don't capture pages being not new, with pending redirect, or location |
| 341 // change. |
| 342 if (!is_new_navigation || frame->isNavigationScheduled()) |
| 343 return; |
| 344 |
326 base::debug::SetCrashKeyValue( | 345 base::debug::SetCrashKeyValue( |
327 crash_keys::kViewCount, | 346 crash_keys::kViewCount, |
328 base::SizeTToString(content::RenderView::GetRenderViewCount())); | 347 base::SizeTToString(content::RenderView::GetRenderViewCount())); |
| 348 |
| 349 CapturePageTextLater(PRELIMINARY_CAPTURE, base::TimeDelta::FromMilliseconds( |
| 350 kDelayForForcedCaptureMs)); |
329 } | 351 } |
330 | 352 |
331 void ChromeRenderFrameObserver::CapturePageText(TextCaptureType capture_type) { | 353 void ChromeRenderFrameObserver::CapturePageText(TextCaptureType capture_type) { |
332 WebLocalFrame* frame = render_frame()->GetWebFrame(); | 354 WebLocalFrame* frame = render_frame()->GetWebFrame(); |
333 if (!frame) | 355 if (!frame) |
334 return; | 356 return; |
335 | 357 |
336 // Don't capture pages that have pending redirect or location change. | |
337 if (frame->isNavigationScheduled()) | |
338 return; | |
339 | |
340 // Don't index/capture pages that are in view source mode. | 358 // Don't index/capture pages that are in view source mode. |
341 if (frame->isViewSourceModeEnabled()) | 359 if (frame->isViewSourceModeEnabled()) |
342 return; | 360 return; |
343 | 361 |
344 // Don't capture text of the error pages. | 362 // Don't capture text of the error pages. |
345 WebDataSource* ds = frame->dataSource(); | 363 WebDataSource* ds = frame->dataSource(); |
346 if (ds && ds->hasUnreachableURL()) | 364 if (ds && ds->hasUnreachableURL()) |
347 return; | 365 return; |
348 | 366 |
349 // Don't index/capture pages that are being prerendered. | 367 // Don't index/capture pages that are being prerendered. |
350 if (prerender::PrerenderHelper::IsPrerendering(render_frame())) | 368 if (prerender::PrerenderHelper::IsPrerendering(render_frame())) |
351 return; | 369 return; |
352 | 370 |
353 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); | 371 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); |
354 | 372 |
355 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the | 373 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the |
356 // translate helper for language detection and possible translation. | 374 // translate helper for language detection and possible translation. |
357 base::string16 contents = frame->contentAsText(kMaxIndexChars); | 375 base::string16 contents = frame->contentAsText(kMaxIndexChars); |
358 | 376 |
359 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, | 377 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, |
360 base::TimeTicks::Now() - capture_begin_time); | 378 base::TimeTicks::Now() - capture_begin_time); |
361 | 379 |
362 // We should run language detection only once. Parsing finishes before | 380 if (translate_helper_) |
363 // the page loads, so let's pick that timing. | |
364 if (translate_helper_ && capture_type == PRELIMINARY_CAPTURE) | |
365 translate_helper_->PageCaptured(contents); | 381 translate_helper_->PageCaptured(contents); |
366 | 382 |
367 TRACE_EVENT0("renderer", "ChromeRenderFrameObserver::CapturePageText"); | 383 TRACE_EVENT0("renderer", "ChromeRenderFrameObserver::CapturePageText"); |
368 | 384 |
369 #if defined(SAFE_BROWSING_CSD) | 385 #if defined(SAFE_BROWSING_CSD) |
370 // Will swap out the string. | 386 // Will swap out the string. |
371 if (phishing_classifier_) | 387 if (phishing_classifier_) |
372 phishing_classifier_->PageCaptured(&contents, | 388 phishing_classifier_->PageCaptured(&contents, |
373 capture_type == PRELIMINARY_CAPTURE); | 389 capture_type == PRELIMINARY_CAPTURE); |
374 #endif | 390 #endif |
375 } | 391 } |
376 | 392 |
377 void ChromeRenderFrameObserver::DidMeaningfulLayout( | 393 void ChromeRenderFrameObserver::CapturePageTextLater( |
378 blink::WebMeaningfulLayout layout_type) { | 394 TextCaptureType capture_type, |
379 switch (layout_type) { | 395 base::TimeDelta delay) { |
380 case blink::WebMeaningfulLayout::FinishedParsing: | 396 capture_timer_.Start(FROM_HERE, delay, |
381 CapturePageText(PRELIMINARY_CAPTURE); | 397 base::Bind(&ChromeRenderFrameObserver::CapturePageText, |
382 break; | 398 base::Unretained(this), capture_type)); |
383 case blink::WebMeaningfulLayout::FinishedLoading: | |
384 CapturePageText(FINAL_CAPTURE); | |
385 break; | |
386 default: | |
387 break; | |
388 } | |
389 } | 399 } |
OLD | NEW |