OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/chrome_render_view_observer.h" | 5 #include "chrome/renderer/chrome_render_view_observer.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/bind_helpers.h" | 8 #include "base/bind_helpers.h" |
9 #include "base/command_line.h" | 9 #include "base/command_line.h" |
10 #include "base/debug/crash_logging.h" | 10 #include "base/debug/crash_logging.h" |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
73 using blink::WebSecurityOrigin; | 73 using blink::WebSecurityOrigin; |
74 using blink::WebSize; | 74 using blink::WebSize; |
75 using blink::WebString; | 75 using blink::WebString; |
76 using blink::WebTouchEvent; | 76 using blink::WebTouchEvent; |
77 using blink::WebURL; | 77 using blink::WebURL; |
78 using blink::WebURLRequest; | 78 using blink::WebURLRequest; |
79 using blink::WebView; | 79 using blink::WebView; |
80 using blink::WebVector; | 80 using blink::WebVector; |
81 using blink::WebWindowFeatures; | 81 using blink::WebWindowFeatures; |
82 | 82 |
83 // Delay in milliseconds that we'll wait before capturing the page contents | 83 // Delay in milliseconds that we'll wait before capturing the page contents. |
84 // and thumbnail. | |
85 static const int kDelayForCaptureMs = 500; | 84 static const int kDelayForCaptureMs = 500; |
86 | 85 |
87 // Typically, we capture the page data once the page is loaded. | |
88 // Sometimes, the page never finishes to load, preventing the page capture | |
89 // To workaround this problem, we always perform a capture after the following | |
90 // delay. | |
91 static const int kDelayForForcedCaptureMs = 6000; | |
92 | |
93 // define to write the time necessary for thumbnail/DOM text retrieval, | 86 // define to write the time necessary for thumbnail/DOM text retrieval, |
94 // respectively, into the system debug log | 87 // respectively, into the system debug log |
95 // #define TIME_TEXT_RETRIEVAL | 88 // #define TIME_TEXT_RETRIEVAL |
96 | 89 |
97 // maximum number of characters in the document to index, any text beyond this | 90 // maximum number of characters in the document to index, any text beyond this |
98 // point will be clipped | 91 // point will be clipped |
99 static const size_t kMaxIndexChars = 65535; | 92 static const size_t kMaxIndexChars = 65535; |
100 | 93 |
101 // Constants for UMA statistic collection. | 94 // Constants for UMA statistic collection. |
102 static const char kTranslateCaptureText[] = "Translate.CaptureText"; | 95 static const char kTranslateCaptureText[] = "Translate.CaptureText"; |
(...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
265 if (main_frame->isWebRemoteFrame()) | 258 if (main_frame->isWebRemoteFrame()) |
266 return; | 259 return; |
267 | 260 |
268 GURL osdd_url = main_frame->document().openSearchDescriptionURL(); | 261 GURL osdd_url = main_frame->document().openSearchDescriptionURL(); |
269 if (!osdd_url.is_empty()) { | 262 if (!osdd_url.is_empty()) { |
270 Send(new ChromeViewHostMsg_PageHasOSDD( | 263 Send(new ChromeViewHostMsg_PageHasOSDD( |
271 routing_id(), main_frame->document().url(), osdd_url, | 264 routing_id(), main_frame->document().url(), osdd_url, |
272 search_provider::AUTODETECTED_PROVIDER)); | 265 search_provider::AUTODETECTED_PROVIDER)); |
273 } | 266 } |
274 | 267 |
275 // Don't capture pages including refresh meta tag. | |
276 if (HasRefreshMetaTag(main_frame)) | |
277 return; | |
278 | |
279 CapturePageInfoLater( | 268 CapturePageInfoLater( |
280 false, // preliminary_capture | 269 false, // preliminary_capture |
281 base::TimeDelta::FromMilliseconds( | 270 base::TimeDelta::FromMilliseconds( |
282 render_view()->GetContentStateImmediately() ? | 271 render_view()->GetContentStateImmediately() ? |
283 0 : kDelayForCaptureMs)); | 272 0 : kDelayForCaptureMs)); |
284 } | 273 } |
285 | 274 |
286 void ChromeRenderViewObserver::DidCommitProvisionalLoad( | 275 void ChromeRenderViewObserver::DidCommitProvisionalLoad( |
287 WebLocalFrame* frame, bool is_new_navigation) { | 276 WebLocalFrame* frame, bool is_new_navigation) { |
288 // Don't capture pages being not new, or including refresh meta tag. | 277 // Don't capture pages being not new, or including refresh meta tag. |
289 if (!is_new_navigation || HasRefreshMetaTag(frame)) | 278 if (!is_new_navigation) |
290 return; | 279 return; |
291 | 280 |
292 base::debug::SetCrashKeyValue( | 281 base::debug::SetCrashKeyValue( |
293 crash_keys::kViewCount, | 282 crash_keys::kViewCount, |
294 base::SizeTToString(content::RenderView::GetRenderViewCount())); | 283 base::SizeTToString(content::RenderView::GetRenderViewCount())); |
295 | |
296 CapturePageInfoLater( | |
297 true, // preliminary_capture | |
298 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs)); | |
299 } | 284 } |
300 | 285 |
301 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture, | 286 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture, |
302 base::TimeDelta delay) { | 287 base::TimeDelta delay) { |
303 capture_timer_.Start( | 288 capture_timer_.Start( |
304 FROM_HERE, | 289 FROM_HERE, |
305 delay, | 290 delay, |
306 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, | 291 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, |
307 base::Unretained(this), | 292 base::Unretained(this), |
308 preliminary_capture)); | 293 preliminary_capture)); |
309 } | 294 } |
310 | 295 |
| 296 void ChromeRenderViewObserver::DidFirstLayoutAfterFinishedParsing() { |
| 297 CapturePageInfo(true); |
| 298 } |
| 299 |
311 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) { | 300 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) { |
312 if (!render_view()->GetWebView()) | 301 if (!render_view()->GetWebView()) |
313 return; | 302 return; |
314 | 303 |
315 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); | 304 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); |
316 if (!main_frame) | 305 if (!main_frame) |
317 return; | 306 return; |
318 | 307 |
319 // TODO(creis): Refactor WebFrame::contentAsText to handle RemoteFrames, | 308 // TODO(creis): Refactor WebFrame::contentAsText to handle RemoteFrames, |
320 // likely by moving it to the browser process. For now, only capture page | 309 // likely by moving it to the browser process. For now, only capture page |
321 // info from main frames that are LocalFrames, and ignore their RemoteFrame | 310 // info from main frames that are LocalFrames, and ignore their RemoteFrame |
322 // children. | 311 // children. |
323 if (main_frame->isWebRemoteFrame()) | 312 if (main_frame->isWebRemoteFrame()) |
324 return; | 313 return; |
325 | 314 |
| 315 WebLocalFrame* local_frame = main_frame->toWebLocalFrame(); |
| 316 if (local_frame->isNavigationScheduled()) |
| 317 return; |
| 318 |
326 // Don't index/capture pages that are in view source mode. | 319 // Don't index/capture pages that are in view source mode. |
327 if (main_frame->isViewSourceModeEnabled()) | 320 if (main_frame->isViewSourceModeEnabled()) |
328 return; | 321 return; |
329 | 322 |
330 // Don't index/capture pages that failed to load. This only checks the top | 323 // Don't index/capture pages that failed to load. This only checks the top |
331 // level frame so the thumbnail may contain a frame that failed to load. | 324 // level frame so the thumbnail may contain a frame that failed to load. |
332 WebDataSource* ds = main_frame->dataSource(); | 325 WebDataSource* ds = main_frame->dataSource(); |
333 if (ds && ds->hasUnreachableURL()) | 326 if (ds && ds->hasUnreachableURL()) |
334 return; | 327 return; |
335 | 328 |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
381 | 374 |
382 // When the contents are clipped to the maximum, we don't want to have a | 375 // When the contents are clipped to the maximum, we don't want to have a |
383 // partial word indexed at the end that might have been clipped. Therefore, | 376 // partial word indexed at the end that might have been clipped. Therefore, |
384 // terminate the string at the last space to ensure no words are clipped. | 377 // terminate the string at the last space to ensure no words are clipped. |
385 if (contents->size() == kMaxIndexChars) { | 378 if (contents->size() == kMaxIndexChars) { |
386 size_t last_space_index = contents->find_last_of(base::kWhitespaceUTF16); | 379 size_t last_space_index = contents->find_last_of(base::kWhitespaceUTF16); |
387 if (last_space_index != base::string16::npos) | 380 if (last_space_index != base::string16::npos) |
388 contents->resize(last_space_index); | 381 contents->resize(last_space_index); |
389 } | 382 } |
390 } | 383 } |
391 | |
392 bool ChromeRenderViewObserver::HasRefreshMetaTag(WebFrame* frame) { | |
393 if (!frame) | |
394 return false; | |
395 WebElement head = frame->document().head(); | |
396 if (head.isNull() || !head.hasChildNodes()) | |
397 return false; | |
398 | |
399 const WebString tag_name(base::ASCIIToUTF16("meta")); | |
400 const WebString attribute_name(base::ASCIIToUTF16("http-equiv")); | |
401 | |
402 WebNodeList children = head.childNodes(); | |
403 for (size_t i = 0; i < children.length(); ++i) { | |
404 WebNode node = children.item(i); | |
405 if (!node.isElementNode()) | |
406 continue; | |
407 WebElement element = node.to<WebElement>(); | |
408 if (!element.hasHTMLTagName(tag_name)) | |
409 continue; | |
410 WebString value = element.getAttribute(attribute_name); | |
411 if (value.isNull() || | |
412 !base::LowerCaseEqualsASCII(base::StringPiece16(value), "refresh")) | |
413 continue; | |
414 return true; | |
415 } | |
416 return false; | |
417 } | |
OLD | NEW |