OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/chrome_render_frame_observer.h" | 5 #include "chrome/renderer/chrome_render_frame_observer.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 | 8 |
9 #include <limits> | 9 #include <limits> |
10 #include <string> | 10 #include <string> |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
46 #endif | 46 #endif |
47 | 47 |
48 using blink::WebDataSource; | 48 using blink::WebDataSource; |
49 using blink::WebElement; | 49 using blink::WebElement; |
50 using blink::WebLocalFrame; | 50 using blink::WebLocalFrame; |
51 using blink::WebNode; | 51 using blink::WebNode; |
52 using blink::WebString; | 52 using blink::WebString; |
53 using content::SSLStatus; | 53 using content::SSLStatus; |
54 using content::RenderFrame; | 54 using content::RenderFrame; |
55 | 55 |
56 // Delay in milliseconds that we'll wait before capturing the page contents. | |
57 static const int kDelayForCaptureMs = 500; | |
58 | |
59 // Typically, we capture the page data once the page is loaded. | |
60 // Sometimes, the page never finishes to load, preventing the page capture | |
61 // To workaround this problem, we always perform a capture after the following | |
62 // delay. | |
63 static const int kDelayForForcedCaptureMs = 6000; | |
64 | |
65 // Maximum number of characters in the document to index. | 56 // Maximum number of characters in the document to index. |
66 // Any text beyond this point will be clipped. | 57 // Any text beyond this point will be clipped. |
67 static const size_t kMaxIndexChars = 65535; | 58 static const size_t kMaxIndexChars = 65535; |
68 | 59 |
69 // Constants for UMA statistic collection. | 60 // Constants for UMA statistic collection. |
70 static const char kTranslateCaptureText[] = "Translate.CaptureText"; | 61 static const char kTranslateCaptureText[] = "Translate.CaptureText"; |
71 | 62 |
72 namespace { | 63 namespace { |
73 | 64 |
74 // If the source image is null or occupies less area than | 65 // If the source image is null or occupies less area than |
(...skipping 30 matching lines...) Expand all Loading... |
105 skia::ImageOperations::RESIZE_GOOD, | 96 skia::ImageOperations::RESIZE_GOOD, |
106 static_cast<int>(scaled_size.width()), | 97 static_cast<int>(scaled_size.width()), |
107 static_cast<int>(scaled_size.height())); | 98 static_cast<int>(scaled_size.height())); |
108 } | 99 } |
109 | 100 |
110 } // namespace | 101 } // namespace |
111 | 102 |
112 ChromeRenderFrameObserver::ChromeRenderFrameObserver( | 103 ChromeRenderFrameObserver::ChromeRenderFrameObserver( |
113 content::RenderFrame* render_frame) | 104 content::RenderFrame* render_frame) |
114 : content::RenderFrameObserver(render_frame), | 105 : content::RenderFrameObserver(render_frame), |
115 capture_timer_(false, false), | |
116 translate_helper_(nullptr), | 106 translate_helper_(nullptr), |
117 phishing_classifier_(nullptr) { | 107 phishing_classifier_(nullptr) { |
118 // Don't do anything for subframes. | 108 // Don't do anything for subframes. |
119 if (!render_frame->IsMainFrame()) | 109 if (!render_frame->IsMainFrame()) |
120 return; | 110 return; |
121 | 111 |
122 const base::CommandLine& command_line = | 112 const base::CommandLine& command_line = |
123 *base::CommandLine::ForCurrentProcess(); | 113 *base::CommandLine::ForCurrentProcess(); |
124 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) | 114 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) |
125 OnSetClientSidePhishingDetection(true); | 115 OnSetClientSidePhishingDetection(true); |
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
300 if (frame->parent()) | 290 if (frame->parent()) |
301 return; | 291 return; |
302 | 292 |
303 GURL osdd_url = frame->document().openSearchDescriptionURL(); | 293 GURL osdd_url = frame->document().openSearchDescriptionURL(); |
304 if (!osdd_url.is_empty()) { | 294 if (!osdd_url.is_empty()) { |
305 Send(new ChromeViewHostMsg_PageHasOSDD( | 295 Send(new ChromeViewHostMsg_PageHasOSDD( |
306 routing_id(), frame->document().url(), osdd_url, | 296 routing_id(), frame->document().url(), osdd_url, |
307 search_provider::AUTODETECTED_PROVIDER)); | 297 search_provider::AUTODETECTED_PROVIDER)); |
308 } | 298 } |
309 | 299 |
310 // Don't capture pages that have pending redirect or location change. | 300 // TODO(dglazkov): This is only necessary for ChromeRenderViewTests, |
311 if (frame->isNavigationScheduled()) | 301 // since they don't actually pump frames. These tests will need |
312 return; | 302 // to be rewritten eventually (there is no ChromeRenderView anymore). |
313 | 303 if (render_frame()->GetRenderView()->GetContentStateImmediately()) { |
314 CapturePageTextLater( | 304 CapturePageText(PRELIMINARY_CAPTURE); |
315 FINAL_CAPTURE, | 305 } |
316 base::TimeDelta::FromMilliseconds( | |
317 render_frame()->GetRenderView()->GetContentStateImmediately() | |
318 ? 0 | |
319 : kDelayForCaptureMs)); | |
320 } | 306 } |
321 | 307 |
322 void ChromeRenderFrameObserver::DidStartProvisionalLoad() { | 308 void ChromeRenderFrameObserver::DidStartProvisionalLoad() { |
323 // Let translate_helper do any preparatory work for loading a URL. | 309 // Let translate_helper do any preparatory work for loading a URL. |
324 if (!translate_helper_) | 310 if (!translate_helper_) |
325 return; | 311 return; |
326 | 312 |
327 translate_helper_->PrepareForUrl( | 313 translate_helper_->PrepareForUrl( |
328 render_frame()->GetWebFrame()->document().url()); | 314 render_frame()->GetWebFrame()->document().url()); |
329 } | 315 } |
330 | 316 |
331 void ChromeRenderFrameObserver::DidCommitProvisionalLoad( | 317 void ChromeRenderFrameObserver::DidCommitProvisionalLoad( |
332 bool is_new_navigation, | 318 bool is_new_navigation, |
333 bool is_same_page_navigation) { | 319 bool is_same_page_navigation) { |
334 WebLocalFrame* frame = render_frame()->GetWebFrame(); | 320 WebLocalFrame* frame = render_frame()->GetWebFrame(); |
335 | 321 |
336 // Don't do anything for subframes. | 322 // Don't do anything for subframes. |
337 if (frame->parent()) | 323 if (frame->parent()) |
338 return; | 324 return; |
339 | 325 |
340 // Don't capture pages being not new, with pending redirect, or location | |
341 // change. | |
342 if (!is_new_navigation || frame->isNavigationScheduled()) | |
343 return; | |
344 | |
345 base::debug::SetCrashKeyValue( | 326 base::debug::SetCrashKeyValue( |
346 crash_keys::kViewCount, | 327 crash_keys::kViewCount, |
347 base::SizeTToString(content::RenderView::GetRenderViewCount())); | 328 base::SizeTToString(content::RenderView::GetRenderViewCount())); |
348 | |
349 CapturePageTextLater(PRELIMINARY_CAPTURE, base::TimeDelta::FromMilliseconds( | |
350 kDelayForForcedCaptureMs)); | |
351 } | 329 } |
352 | 330 |
353 void ChromeRenderFrameObserver::CapturePageText(TextCaptureType capture_type) { | 331 void ChromeRenderFrameObserver::CapturePageText(TextCaptureType capture_type) { |
354 WebLocalFrame* frame = render_frame()->GetWebFrame(); | 332 WebLocalFrame* frame = render_frame()->GetWebFrame(); |
355 if (!frame) | 333 if (!frame) |
356 return; | 334 return; |
357 | 335 |
| 336 // Don't capture pages that have pending redirect or location change. |
| 337 if (frame->isNavigationScheduled()) |
| 338 return; |
| 339 |
358 // Don't index/capture pages that are in view source mode. | 340 // Don't index/capture pages that are in view source mode. |
359 if (frame->isViewSourceModeEnabled()) | 341 if (frame->isViewSourceModeEnabled()) |
360 return; | 342 return; |
361 | 343 |
362 // Don't capture text of the error pages. | 344 // Don't capture text of the error pages. |
363 WebDataSource* ds = frame->dataSource(); | 345 WebDataSource* ds = frame->dataSource(); |
364 if (ds && ds->hasUnreachableURL()) | 346 if (ds && ds->hasUnreachableURL()) |
365 return; | 347 return; |
366 | 348 |
367 // Don't index/capture pages that are being prerendered. | 349 // Don't index/capture pages that are being prerendered. |
368 if (prerender::PrerenderHelper::IsPrerendering(render_frame())) | 350 if (prerender::PrerenderHelper::IsPrerendering(render_frame())) |
369 return; | 351 return; |
370 | 352 |
371 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); | 353 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); |
372 | 354 |
373 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the | 355 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the |
374 // translate helper for language detection and possible translation. | 356 // translate helper for language detection and possible translation. |
375 base::string16 contents = frame->contentAsText(kMaxIndexChars); | 357 base::string16 contents = frame->contentAsText(kMaxIndexChars); |
376 | 358 |
377 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, | 359 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, |
378 base::TimeTicks::Now() - capture_begin_time); | 360 base::TimeTicks::Now() - capture_begin_time); |
379 | 361 |
380 if (translate_helper_) | 362 // We should run language detection only once. Parsing finishes before |
| 363 // the page loads, so let's pick that timing. |
| 364 if (translate_helper_ && capture_type == PRELIMINARY_CAPTURE) |
381 translate_helper_->PageCaptured(contents); | 365 translate_helper_->PageCaptured(contents); |
382 | 366 |
383 TRACE_EVENT0("renderer", "ChromeRenderFrameObserver::CapturePageText"); | 367 TRACE_EVENT0("renderer", "ChromeRenderFrameObserver::CapturePageText"); |
384 | 368 |
385 #if defined(SAFE_BROWSING_CSD) | 369 #if defined(SAFE_BROWSING_CSD) |
386 // Will swap out the string. | 370 // Will swap out the string. |
387 if (phishing_classifier_) | 371 if (phishing_classifier_) |
388 phishing_classifier_->PageCaptured(&contents, | 372 phishing_classifier_->PageCaptured(&contents, |
389 capture_type == PRELIMINARY_CAPTURE); | 373 capture_type == PRELIMINARY_CAPTURE); |
390 #endif | 374 #endif |
391 } | 375 } |
392 | 376 |
393 void ChromeRenderFrameObserver::CapturePageTextLater( | 377 void ChromeRenderFrameObserver::DidMeaningfulLayout( |
394 TextCaptureType capture_type, | 378 blink::WebMeaningfulLayout layout_type) { |
395 base::TimeDelta delay) { | 379 switch (layout_type) { |
396 capture_timer_.Start(FROM_HERE, delay, | 380 case blink::WebMeaningfulLayout::FinishedParsing: |
397 base::Bind(&ChromeRenderFrameObserver::CapturePageText, | 381 CapturePageText(PRELIMINARY_CAPTURE); |
398 base::Unretained(this), capture_type)); | 382 break; |
| 383 case blink::WebMeaningfulLayout::FinishedLoading: |
| 384 CapturePageText(FINAL_CAPTURE); |
| 385 break; |
| 386 default: |
| 387 break; |
| 388 } |
399 } | 389 } |
OLD | NEW |