| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/chrome_render_view_observer.h" | 5 #include "chrome/renderer/chrome_render_view_observer.h" |
| 6 | 6 |
| 7 #include "base/bind.h" | 7 #include "base/bind.h" |
| 8 #include "base/bind_helpers.h" | 8 #include "base/bind_helpers.h" |
| 9 #include "base/command_line.h" | 9 #include "base/command_line.h" |
| 10 #include "base/debug/crash_logging.h" | 10 #include "base/debug/crash_logging.h" |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 73 using blink::WebSecurityOrigin; | 73 using blink::WebSecurityOrigin; |
| 74 using blink::WebSize; | 74 using blink::WebSize; |
| 75 using blink::WebString; | 75 using blink::WebString; |
| 76 using blink::WebTouchEvent; | 76 using blink::WebTouchEvent; |
| 77 using blink::WebURL; | 77 using blink::WebURL; |
| 78 using blink::WebURLRequest; | 78 using blink::WebURLRequest; |
| 79 using blink::WebView; | 79 using blink::WebView; |
| 80 using blink::WebVector; | 80 using blink::WebVector; |
| 81 using blink::WebWindowFeatures; | 81 using blink::WebWindowFeatures; |
| 82 | 82 |
| 83 // Delay in milliseconds that we'll wait before capturing the page contents | 83 // Delay in milliseconds that we'll wait before capturing the page contents. |
| 84 // and thumbnail. | |
| 85 static const int kDelayForCaptureMs = 500; | 84 static const int kDelayForCaptureMs = 500; |
| 86 | 85 |
| 87 // Typically, we capture the page data once the page is loaded. | |
| 88 // Sometimes, the page never finishes to load, preventing the page capture | |
| 89 // To workaround this problem, we always perform a capture after the following | |
| 90 // delay. | |
| 91 static const int kDelayForForcedCaptureMs = 6000; | |
| 92 | |
| 93 // define to write the time necessary for thumbnail/DOM text retrieval, | 86 // define to write the time necessary for thumbnail/DOM text retrieval, |
| 94 // respectively, into the system debug log | 87 // respectively, into the system debug log |
| 95 // #define TIME_TEXT_RETRIEVAL | 88 // #define TIME_TEXT_RETRIEVAL |
| 96 | 89 |
| 97 // maximum number of characters in the document to index, any text beyond this | 90 // maximum number of characters in the document to index, any text beyond this |
| 98 // point will be clipped | 91 // point will be clipped |
| 99 static const size_t kMaxIndexChars = 65535; | 92 static const size_t kMaxIndexChars = 65535; |
| 100 | 93 |
| 101 // Constants for UMA statistic collection. | 94 // Constants for UMA statistic collection. |
| 102 static const char kTranslateCaptureText[] = "Translate.CaptureText"; | 95 static const char kTranslateCaptureText[] = "Translate.CaptureText"; |
| (...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 265 if (main_frame->isWebRemoteFrame()) | 258 if (main_frame->isWebRemoteFrame()) |
| 266 return; | 259 return; |
| 267 | 260 |
| 268 GURL osdd_url = main_frame->document().openSearchDescriptionURL(); | 261 GURL osdd_url = main_frame->document().openSearchDescriptionURL(); |
| 269 if (!osdd_url.is_empty()) { | 262 if (!osdd_url.is_empty()) { |
| 270 Send(new ChromeViewHostMsg_PageHasOSDD( | 263 Send(new ChromeViewHostMsg_PageHasOSDD( |
| 271 routing_id(), main_frame->document().url(), osdd_url, | 264 routing_id(), main_frame->document().url(), osdd_url, |
| 272 search_provider::AUTODETECTED_PROVIDER)); | 265 search_provider::AUTODETECTED_PROVIDER)); |
| 273 } | 266 } |
| 274 | 267 |
| 275 // Don't capture pages including refresh meta tag. | |
| 276 if (HasRefreshMetaTag(main_frame)) | |
| 277 return; | |
| 278 | |
| 279 CapturePageInfoLater( | 268 CapturePageInfoLater( |
| 280 false, // preliminary_capture | 269 false, // preliminary_capture |
| 281 base::TimeDelta::FromMilliseconds( | 270 base::TimeDelta::FromMilliseconds( |
| 282 render_view()->GetContentStateImmediately() ? | 271 render_view()->GetContentStateImmediately() ? |
| 283 0 : kDelayForCaptureMs)); | 272 0 : kDelayForCaptureMs)); |
| 284 } | 273 } |
| 285 | 274 |
| 286 void ChromeRenderViewObserver::DidCommitProvisionalLoad( | 275 void ChromeRenderViewObserver::DidCommitProvisionalLoad( |
| 287 WebLocalFrame* frame, bool is_new_navigation) { | 276 WebLocalFrame* frame, bool is_new_navigation) { |
| 288 // Don't capture pages being not new, or including refresh meta tag. | 277 // Don't capture pages being not new, or including refresh meta tag. |
| 289 if (!is_new_navigation || HasRefreshMetaTag(frame)) | 278 if (!is_new_navigation) |
| 290 return; | 279 return; |
| 291 | 280 |
| 292 base::debug::SetCrashKeyValue( | 281 base::debug::SetCrashKeyValue( |
| 293 crash_keys::kViewCount, | 282 crash_keys::kViewCount, |
| 294 base::SizeTToString(content::RenderView::GetRenderViewCount())); | 283 base::SizeTToString(content::RenderView::GetRenderViewCount())); |
| 295 | |
| 296 CapturePageInfoLater( | |
| 297 true, // preliminary_capture | |
| 298 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs)); | |
| 299 } | 284 } |
| 300 | 285 |
| 301 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture, | 286 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture, |
| 302 base::TimeDelta delay) { | 287 base::TimeDelta delay) { |
| 303 capture_timer_.Start( | 288 capture_timer_.Start( |
| 304 FROM_HERE, | 289 FROM_HERE, |
| 305 delay, | 290 delay, |
| 306 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, | 291 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, |
| 307 base::Unretained(this), | 292 base::Unretained(this), |
| 308 preliminary_capture)); | 293 preliminary_capture)); |
| 309 } | 294 } |
| 310 | 295 |
| 296 void ChromeRenderViewObserver::DidFirstLayoutAfterFinishedParsing() { |
| 297 CapturePageInfo(true); |
| 298 } |
| 299 |
| 311 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) { | 300 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) { |
| 312 if (!render_view()->GetWebView()) | 301 if (!render_view()->GetWebView()) |
| 313 return; | 302 return; |
| 314 | 303 |
| 315 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); | 304 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); |
| 316 if (!main_frame) | 305 if (!main_frame) |
| 317 return; | 306 return; |
| 318 | 307 |
| 319 // TODO(creis): Refactor WebFrame::contentAsText to handle RemoteFrames, | 308 // TODO(creis): Refactor WebFrame::contentAsText to handle RemoteFrames, |
| 320 // likely by moving it to the browser process. For now, only capture page | 309 // likely by moving it to the browser process. For now, only capture page |
| 321 // info from main frames that are LocalFrames, and ignore their RemoteFrame | 310 // info from main frames that are LocalFrames, and ignore their RemoteFrame |
| 322 // children. | 311 // children. |
| 323 if (main_frame->isWebRemoteFrame()) | 312 if (main_frame->isWebRemoteFrame()) |
| 324 return; | 313 return; |
| 325 | 314 |
| 315 WebLocalFrame* local_frame = main_frame->toWebLocalFrame(); |
| 316 if (local_frame->isNavigationScheduled()) |
| 317 return; |
| 318 |
| 326 // Don't index/capture pages that are in view source mode. | 319 // Don't index/capture pages that are in view source mode. |
| 327 if (main_frame->isViewSourceModeEnabled()) | 320 if (main_frame->isViewSourceModeEnabled()) |
| 328 return; | 321 return; |
| 329 | 322 |
| 330 // Don't index/capture pages that failed to load. This only checks the top | 323 // Don't index/capture pages that failed to load. This only checks the top |
| 331 // level frame so the thumbnail may contain a frame that failed to load. | 324 // level frame so the thumbnail may contain a frame that failed to load. |
| 332 WebDataSource* ds = main_frame->dataSource(); | 325 WebDataSource* ds = main_frame->dataSource(); |
| 333 if (ds && ds->hasUnreachableURL()) | 326 if (ds && ds->hasUnreachableURL()) |
| 334 return; | 327 return; |
| 335 | 328 |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 381 | 374 |
| 382 // When the contents are clipped to the maximum, we don't want to have a | 375 // When the contents are clipped to the maximum, we don't want to have a |
| 383 // partial word indexed at the end that might have been clipped. Therefore, | 376 // partial word indexed at the end that might have been clipped. Therefore, |
| 384 // terminate the string at the last space to ensure no words are clipped. | 377 // terminate the string at the last space to ensure no words are clipped. |
| 385 if (contents->size() == kMaxIndexChars) { | 378 if (contents->size() == kMaxIndexChars) { |
| 386 size_t last_space_index = contents->find_last_of(base::kWhitespaceUTF16); | 379 size_t last_space_index = contents->find_last_of(base::kWhitespaceUTF16); |
| 387 if (last_space_index != base::string16::npos) | 380 if (last_space_index != base::string16::npos) |
| 388 contents->resize(last_space_index); | 381 contents->resize(last_space_index); |
| 389 } | 382 } |
| 390 } | 383 } |
| 391 | |
| 392 bool ChromeRenderViewObserver::HasRefreshMetaTag(WebFrame* frame) { | |
| 393 if (!frame) | |
| 394 return false; | |
| 395 WebElement head = frame->document().head(); | |
| 396 if (head.isNull() || !head.hasChildNodes()) | |
| 397 return false; | |
| 398 | |
| 399 const WebString tag_name(base::ASCIIToUTF16("meta")); | |
| 400 const WebString attribute_name(base::ASCIIToUTF16("http-equiv")); | |
| 401 | |
| 402 WebNodeList children = head.childNodes(); | |
| 403 for (size_t i = 0; i < children.length(); ++i) { | |
| 404 WebNode node = children.item(i); | |
| 405 if (!node.isElementNode()) | |
| 406 continue; | |
| 407 WebElement element = node.to<WebElement>(); | |
| 408 if (!element.hasHTMLTagName(tag_name)) | |
| 409 continue; | |
| 410 WebString value = element.getAttribute(attribute_name); | |
| 411 if (value.isNull() || | |
| 412 !base::LowerCaseEqualsASCII(base::StringPiece16(value), "refresh")) | |
| 413 continue; | |
| 414 return true; | |
| 415 } | |
| 416 return false; | |
| 417 } | |
| OLD | NEW |