Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(710)

Side by Side Diff: chrome/renderer/chrome_render_view_observer.cc

Issue 363293005: Remove page id from "page capture". (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: fixes Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/chrome_render_view_observer.h" 5 #include "chrome/renderer/chrome_render_view_observer.h"
6 6
7 #include "base/bind.h" 7 #include "base/bind.h"
8 #include "base/bind_helpers.h" 8 #include "base/bind_helpers.h"
9 #include "base/command_line.h" 9 #include "base/command_line.h"
10 #include "base/debug/trace_event.h" 10 #include "base/debug/trace_event.h"
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 89
90 // maximum number of characters in the document to index, any text beyond this 90 // maximum number of characters in the document to index, any text beyond this
91 // point will be clipped 91 // point will be clipped
92 static const size_t kMaxIndexChars = 65535; 92 static const size_t kMaxIndexChars = 65535;
93 93
94 // Constants for UMA statistic collection. 94 // Constants for UMA statistic collection.
95 static const char kTranslateCaptureText[] = "Translate.CaptureText"; 95 static const char kTranslateCaptureText[] = "Translate.CaptureText";
96 96
97 namespace { 97 namespace {
98 98
99 GURL StripRef(const GURL& url) {
100 GURL::Replacements replacements;
101 replacements.ClearRef();
102 return url.ReplaceComponents(replacements);
103 }
104
105 #if defined(OS_ANDROID) 99 #if defined(OS_ANDROID)
106 // Parses the DOM for a <meta> tag with a particular name. 100 // Parses the DOM for a <meta> tag with a particular name.
107 // |meta_tag_content| is set to the contents of the 'content' attribute. 101 // |meta_tag_content| is set to the contents of the 'content' attribute.
108 // |found_tag| is set to true if the tag was successfully found. 102 // |found_tag| is set to true if the tag was successfully found.
109 // Returns true if the document was parsed without errors. 103 // Returns true if the document was parsed without errors.
110 bool RetrieveMetaTagContent(const WebFrame* main_frame, 104 bool RetrieveMetaTagContent(const WebFrame* main_frame,
111 const GURL& expected_url, 105 const GURL& expected_url,
112 const std::string& meta_tag_name, 106 const std::string& meta_tag_name,
113 bool* found_tag, 107 bool* found_tag,
114 std::string* meta_tag_content) { 108 std::string* meta_tag_content) {
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
154 148
155 } // namespace 149 } // namespace
156 150
157 ChromeRenderViewObserver::ChromeRenderViewObserver( 151 ChromeRenderViewObserver::ChromeRenderViewObserver(
158 content::RenderView* render_view, 152 content::RenderView* render_view,
159 ChromeRenderProcessObserver* chrome_render_process_observer) 153 ChromeRenderProcessObserver* chrome_render_process_observer)
160 : content::RenderViewObserver(render_view), 154 : content::RenderViewObserver(render_view),
161 chrome_render_process_observer_(chrome_render_process_observer), 155 chrome_render_process_observer_(chrome_render_process_observer),
162 translate_helper_(new TranslateHelper(render_view)), 156 translate_helper_(new TranslateHelper(render_view)),
163 phishing_classifier_(NULL), 157 phishing_classifier_(NULL),
164 last_indexed_page_id_(-1),
165 capture_timer_(false, false) { 158 capture_timer_(false, false) {
166 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); 159 const CommandLine& command_line = *CommandLine::ForCurrentProcess();
167 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) 160 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection))
168 OnSetClientSidePhishingDetection(true); 161 OnSetClientSidePhishingDetection(true);
169 } 162 }
170 163
171 ChromeRenderViewObserver::~ChromeRenderViewObserver() { 164 ChromeRenderViewObserver::~ChromeRenderViewObserver() {
172 } 165 }
173 166
174 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) { 167 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) {
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after
346 Send(new ChromeViewHostMsg_PageHasOSDD( 339 Send(new ChromeViewHostMsg_PageHasOSDD(
347 routing_id(), main_frame->document().url(), osdd_url, 340 routing_id(), main_frame->document().url(), osdd_url,
348 search_provider::AUTODETECTED_PROVIDER)); 341 search_provider::AUTODETECTED_PROVIDER));
349 } 342 }
350 343
351 // Don't capture pages including refresh meta tag. 344 // Don't capture pages including refresh meta tag.
352 if (HasRefreshMetaTag(main_frame)) 345 if (HasRefreshMetaTag(main_frame))
353 return; 346 return;
354 347
355 CapturePageInfoLater( 348 CapturePageInfoLater(
356 render_view()->GetPageId(),
357 false, // preliminary_capture 349 false, // preliminary_capture
358 base::TimeDelta::FromMilliseconds( 350 base::TimeDelta::FromMilliseconds(
359 render_view()->GetContentStateImmediately() ? 351 render_view()->GetContentStateImmediately() ?
360 0 : kDelayForCaptureMs)); 352 0 : kDelayForCaptureMs));
361 } 353 }
362 354
363 void ChromeRenderViewObserver::DidCommitProvisionalLoad( 355 void ChromeRenderViewObserver::DidCommitProvisionalLoad(
364 WebLocalFrame* frame, bool is_new_navigation) { 356 WebLocalFrame* frame, bool is_new_navigation) {
365 // Don't capture pages being not new, or including refresh meta tag. 357 // Don't capture pages being not new, or including refresh meta tag.
366 if (!is_new_navigation || HasRefreshMetaTag(frame)) 358 if (!is_new_navigation || HasRefreshMetaTag(frame)) {
359 capture_timer_.Stop();
367 return; 360 return;
361 }
368 362
369 CapturePageInfoLater( 363 CapturePageInfoLater(
370 render_view()->GetPageId(),
371 true, // preliminary_capture 364 true, // preliminary_capture
372 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs)); 365 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs));
373 } 366 }
374 367
375 void ChromeRenderViewObserver::CapturePageInfoLater(int page_id, 368 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture,
376 bool preliminary_capture,
377 base::TimeDelta delay) { 369 base::TimeDelta delay) {
378 capture_timer_.Start( 370 capture_timer_.Start(
379 FROM_HERE, 371 FROM_HERE,
380 delay, 372 delay,
381 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, 373 base::Bind(&ChromeRenderViewObserver::CapturePageInfo,
382 base::Unretained(this), 374 base::Unretained(this),
383 page_id,
384 preliminary_capture)); 375 preliminary_capture));
385 } 376 }
386 377
387 void ChromeRenderViewObserver::CapturePageInfo(int page_id, 378 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) {
388 bool preliminary_capture) {
389 // If |page_id| is obsolete, we should stop indexing and capturing a page.
390 if (render_view()->GetPageId() != page_id)
391 return;
392
393 if (!render_view()->GetWebView()) 379 if (!render_view()->GetWebView())
394 return; 380 return;
395 381
396 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); 382 WebFrame* main_frame = render_view()->GetWebView()->mainFrame();
397 if (!main_frame) 383 if (!main_frame)
398 return; 384 return;
399 385
400 // Don't index/capture pages that are in view source mode. 386 // Don't index/capture pages that are in view source mode.
401 if (main_frame->isViewSourceModeEnabled()) 387 if (main_frame->isViewSourceModeEnabled())
402 return; 388 return;
(...skipping 13 matching lines...) Expand all
416 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the 402 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the
417 // translate helper for language detection and possible translation. 403 // translate helper for language detection and possible translation.
418 base::string16 contents; 404 base::string16 contents;
419 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); 405 base::TimeTicks capture_begin_time = base::TimeTicks::Now();
420 CaptureText(main_frame, &contents); 406 CaptureText(main_frame, &contents);
421 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, 407 UMA_HISTOGRAM_TIMES(kTranslateCaptureText,
422 base::TimeTicks::Now() - capture_begin_time); 408 base::TimeTicks::Now() - capture_begin_time);
423 if (translate_helper_) 409 if (translate_helper_)
424 translate_helper_->PageCaptured(contents); 410 translate_helper_->PageCaptured(contents);
425 411
426 // TODO(shess): Is indexing "Full text search" indexing? In that
427 // case more of this can go.
428 // Skip indexing if this is not a new load. Note that the case where
429 // page_id == last_indexed_page_id_ is more complicated, since we need to
430 // reindex if the toplevel URL has changed (such as from a redirect), even
431 // though this may not cause the page id to be incremented.
432 if (page_id < last_indexed_page_id_)
433 return;
434
435 bool same_page_id = last_indexed_page_id_ == page_id;
436 if (!preliminary_capture)
437 last_indexed_page_id_ = page_id;
438
439 // Get the URL for this page.
440 GURL url(main_frame->document().url());
441 if (url.is_empty()) {
442 if (!preliminary_capture)
443 last_indexed_url_ = GURL();
444 return;
445 }
446
447 // If the page id is unchanged, check whether the URL (ignoring fragments)
448 // has changed. If so, we need to reindex. Otherwise, assume this is a
449 // reload, in-page navigation, or some other load type where we don't want to
450 // reindex. Note: subframe navigations after onload increment the page id,
451 // so these will trigger a reindex.
452 GURL stripped_url(StripRef(url));
453 if (same_page_id && stripped_url == last_indexed_url_)
454 return;
455
456 if (!preliminary_capture)
457 last_indexed_url_ = stripped_url;
458
459 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); 412 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo");
460 413
461 #if defined(FULL_SAFE_BROWSING) 414 #if defined(FULL_SAFE_BROWSING)
462 // Will swap out the string. 415 // Will swap out the string.
463 if (phishing_classifier_) 416 if (phishing_classifier_)
464 phishing_classifier_->PageCaptured(&contents, preliminary_capture); 417 phishing_classifier_->PageCaptured(&contents, preliminary_capture);
465 #endif 418 #endif
466 } 419 }
467 420
468 void ChromeRenderViewObserver::CaptureText(WebFrame* frame, 421 void ChromeRenderViewObserver::CaptureText(WebFrame* frame,
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
514 WebElement element = node.to<WebElement>(); 467 WebElement element = node.to<WebElement>();
515 if (!element.hasHTMLTagName(tag_name)) 468 if (!element.hasHTMLTagName(tag_name))
516 continue; 469 continue;
517 WebString value = element.getAttribute(attribute_name); 470 WebString value = element.getAttribute(attribute_name);
518 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) 471 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh"))
519 continue; 472 continue;
520 return true; 473 return true;
521 } 474 }
522 return false; 475 return false;
523 } 476 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698