Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(134)

Side by Side Diff: chrome/renderer/chrome_render_view_observer.cc

Issue 363293005: Remove page id from "page capture". (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/chrome_render_view_observer.h" 5 #include "chrome/renderer/chrome_render_view_observer.h"
6 6
7 #include "base/bind.h" 7 #include "base/bind.h"
8 #include "base/bind_helpers.h" 8 #include "base/bind_helpers.h"
9 #include "base/command_line.h" 9 #include "base/command_line.h"
10 #include "base/debug/trace_event.h" 10 #include "base/debug/trace_event.h"
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 89
90 // maximum number of characters in the document to index, any text beyond this 90 // maximum number of characters in the document to index, any text beyond this
91 // point will be clipped 91 // point will be clipped
92 static const size_t kMaxIndexChars = 65535; 92 static const size_t kMaxIndexChars = 65535;
93 93
94 // Constants for UMA statistic collection. 94 // Constants for UMA statistic collection.
95 static const char kTranslateCaptureText[] = "Translate.CaptureText"; 95 static const char kTranslateCaptureText[] = "Translate.CaptureText";
96 96
97 namespace { 97 namespace {
98 98
99 GURL StripRef(const GURL& url) {
100 GURL::Replacements replacements;
101 replacements.ClearRef();
102 return url.ReplaceComponents(replacements);
103 }
104
105 #if defined(OS_ANDROID) 99 #if defined(OS_ANDROID)
106 // Parses the DOM for a <meta> tag with a particular name. 100 // Parses the DOM for a <meta> tag with a particular name.
107 // |meta_tag_content| is set to the contents of the 'content' attribute. 101 // |meta_tag_content| is set to the contents of the 'content' attribute.
108 // |found_tag| is set to true if the tag was successfully found. 102 // |found_tag| is set to true if the tag was successfully found.
109 // Returns true if the document was parsed without errors. 103 // Returns true if the document was parsed without errors.
110 bool RetrieveMetaTagContent(const WebFrame* main_frame, 104 bool RetrieveMetaTagContent(const WebFrame* main_frame,
111 const GURL& expected_url, 105 const GURL& expected_url,
112 const std::string& meta_tag_name, 106 const std::string& meta_tag_name,
113 bool* found_tag, 107 bool* found_tag,
114 std::string* meta_tag_content) { 108 std::string* meta_tag_content) {
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
154 148
155 } // namespace 149 } // namespace
156 150
157 ChromeRenderViewObserver::ChromeRenderViewObserver( 151 ChromeRenderViewObserver::ChromeRenderViewObserver(
158 content::RenderView* render_view, 152 content::RenderView* render_view,
159 ChromeRenderProcessObserver* chrome_render_process_observer) 153 ChromeRenderProcessObserver* chrome_render_process_observer)
160 : content::RenderViewObserver(render_view), 154 : content::RenderViewObserver(render_view),
161 chrome_render_process_observer_(chrome_render_process_observer), 155 chrome_render_process_observer_(chrome_render_process_observer),
162 translate_helper_(new TranslateHelper(render_view)), 156 translate_helper_(new TranslateHelper(render_view)),
163 phishing_classifier_(NULL), 157 phishing_classifier_(NULL),
164 last_indexed_page_id_(-1),
165 capture_timer_(false, false) { 158 capture_timer_(false, false) {
166 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); 159 const CommandLine& command_line = *CommandLine::ForCurrentProcess();
167 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) 160 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection))
168 OnSetClientSidePhishingDetection(true); 161 OnSetClientSidePhishingDetection(true);
169 } 162 }
170 163
171 ChromeRenderViewObserver::~ChromeRenderViewObserver() { 164 ChromeRenderViewObserver::~ChromeRenderViewObserver() {
172 } 165 }
173 166
174 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) { 167 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) {
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after
346 Send(new ChromeViewHostMsg_PageHasOSDD( 339 Send(new ChromeViewHostMsg_PageHasOSDD(
347 routing_id(), main_frame->document().url(), osdd_url, 340 routing_id(), main_frame->document().url(), osdd_url,
348 search_provider::AUTODETECTED_PROVIDER)); 341 search_provider::AUTODETECTED_PROVIDER));
349 } 342 }
350 343
351 // Don't capture pages including refresh meta tag. 344 // Don't capture pages including refresh meta tag.
352 if (HasRefreshMetaTag(main_frame)) 345 if (HasRefreshMetaTag(main_frame))
353 return; 346 return;
354 347
355 CapturePageInfoLater( 348 CapturePageInfoLater(
356 render_view()->GetPageId(),
357 false, // preliminary_capture 349 false, // preliminary_capture
358 base::TimeDelta::FromMilliseconds( 350 base::TimeDelta::FromMilliseconds(
359 render_view()->GetContentStateImmediately() ? 351 render_view()->GetContentStateImmediately() ?
360 0 : kDelayForCaptureMs)); 352 0 : kDelayForCaptureMs));
361 } 353 }
362 354
363 void ChromeRenderViewObserver::DidCommitProvisionalLoad( 355 void ChromeRenderViewObserver::DidCommitProvisionalLoad(
364 WebLocalFrame* frame, bool is_new_navigation) { 356 WebLocalFrame* frame, bool is_new_navigation) {
365 // Don't capture pages being not new, or including refresh meta tag. 357 // Don't capture pages being not new, or including refresh meta tag.
366 if (!is_new_navigation || HasRefreshMetaTag(frame)) 358 if (!is_new_navigation || HasRefreshMetaTag(frame))
367 return; 359 return;
368 360
369 CapturePageInfoLater( 361 CapturePageInfoLater(
370 render_view()->GetPageId(),
371 true, // preliminary_capture 362 true, // preliminary_capture
372 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs)); 363 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs));
373 } 364 }
374 365
375 void ChromeRenderViewObserver::CapturePageInfoLater(int page_id, 366 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture,
376 bool preliminary_capture,
377 base::TimeDelta delay) { 367 base::TimeDelta delay) {
378 capture_timer_.Start( 368 capture_timer_.Start(
379 FROM_HERE, 369 FROM_HERE,
380 delay, 370 delay,
381 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, 371 base::Bind(&ChromeRenderViewObserver::CapturePageInfo,
382 base::Unretained(this), 372 base::Unretained(this),
383 page_id,
384 preliminary_capture)); 373 preliminary_capture));
385 } 374 }
386 375
387 void ChromeRenderViewObserver::CapturePageInfo(int page_id, 376 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) {
388 bool preliminary_capture) {
389 // If |page_id| is obsolete, we should stop indexing and capturing a page.
390 if (render_view()->GetPageId() != page_id)
awong 2014/07/07 18:36:59 This looks like a guard against the RenderView loa
Avi (use Gerrit) 2014/07/07 18:49:46 I'm not convinced this check for page id is even n
Lei Zhang 2014/07/07 19:37:04 Can the page id change if |is_new_navigation| in D
Avi (use Gerrit) 2014/07/07 19:51:22 I don't know. I can reset the timer no matter what
391 return;
392
393 if (!render_view()->GetWebView()) 377 if (!render_view()->GetWebView())
394 return; 378 return;
395 379
396 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); 380 WebFrame* main_frame = render_view()->GetWebView()->mainFrame();
397 if (!main_frame) 381 if (!main_frame)
398 return; 382 return;
399 383
400 // Don't index/capture pages that are in view source mode. 384 // Don't index/capture pages that are in view source mode.
401 if (main_frame->isViewSourceModeEnabled()) 385 if (main_frame->isViewSourceModeEnabled())
402 return; 386 return;
(...skipping 13 matching lines...) Expand all
416 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the 400 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the
417 // translate helper for language detection and possible translation. 401 // translate helper for language detection and possible translation.
418 base::string16 contents; 402 base::string16 contents;
419 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); 403 base::TimeTicks capture_begin_time = base::TimeTicks::Now();
420 CaptureText(main_frame, &contents); 404 CaptureText(main_frame, &contents);
421 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, 405 UMA_HISTOGRAM_TIMES(kTranslateCaptureText,
422 base::TimeTicks::Now() - capture_begin_time); 406 base::TimeTicks::Now() - capture_begin_time);
423 if (translate_helper_) 407 if (translate_helper_)
424 translate_helper_->PageCaptured(contents); 408 translate_helper_->PageCaptured(contents);
425 409
426 // TODO(shess): Is indexing "Full text search" indexing? In that
427 // case more of this can go.
428 // Skip indexing if this is not a new load. Note that the case where
429 // page_id == last_indexed_page_id_ is more complicated, since we need to
430 // reindex if the toplevel URL has changed (such as from a redirect), even
431 // though this may not cause the page id to be incremented.
432 if (page_id < last_indexed_page_id_)
433 return;
434
435 bool same_page_id = last_indexed_page_id_ == page_id;
436 if (!preliminary_capture)
437 last_indexed_page_id_ = page_id;
438
439 // Get the URL for this page.
440 GURL url(main_frame->document().url());
441 if (url.is_empty()) {
442 if (!preliminary_capture)
443 last_indexed_url_ = GURL();
444 return;
445 }
446
447 // If the page id is unchanged, check whether the URL (ignoring fragments)
448 // has changed. If so, we need to reindex. Otherwise, assume this is a
449 // reload, in-page navigation, or some other load type where we don't want to
450 // reindex. Note: subframe navigations after onload increment the page id,
451 // so these will trigger a reindex.
452 GURL stripped_url(StripRef(url));
awong 2014/07/07 18:36:59 In this observer, the page_id seems to be used as
Avi (use Gerrit) 2014/07/07 18:49:46 The classifier does check for PAGE_TRANSITION_FORW
Lei Zhang 2014/07/07 19:37:04 The PAGE_TRANSITION_FORWARD_BACK was added in r833
Avi (use Gerrit) 2014/07/07 19:51:22 So the phishing classifier runs an extra time if t
453 if (same_page_id && stripped_url == last_indexed_url_)
454 return;
455
456 if (!preliminary_capture)
457 last_indexed_url_ = stripped_url;
458
459 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); 410 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo");
460 411
461 #if defined(FULL_SAFE_BROWSING) 412 #if defined(FULL_SAFE_BROWSING)
462 // Will swap out the string. 413 // Will swap out the string.
463 if (phishing_classifier_) 414 if (phishing_classifier_)
464 phishing_classifier_->PageCaptured(&contents, preliminary_capture); 415 phishing_classifier_->PageCaptured(&contents, preliminary_capture);
465 #endif 416 #endif
466 } 417 }
467 418
468 void ChromeRenderViewObserver::CaptureText(WebFrame* frame, 419 void ChromeRenderViewObserver::CaptureText(WebFrame* frame,
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
514 WebElement element = node.to<WebElement>(); 465 WebElement element = node.to<WebElement>();
515 if (!element.hasHTMLTagName(tag_name)) 466 if (!element.hasHTMLTagName(tag_name))
516 continue; 467 continue;
517 WebString value = element.getAttribute(attribute_name); 468 WebString value = element.getAttribute(attribute_name);
518 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) 469 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh"))
519 continue; 470 continue;
520 return true; 471 return true;
521 } 472 }
522 return false; 473 return false;
523 } 474 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698