chrome/renderer/chrome_render_view_observer.cc - Issue 363293005: Remove page id from "page capture".

Side by Side Diff: chrome/renderer/chrome_render_view_observer.cc

Issue 363293005: Remove page id from "page capture". (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: fixes Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « chrome/renderer/chrome_render_view_observer.h ('k') | chrome/renderer/safe_browsing/phishing_classifier_delegate.cc » ('j') | chrome/renderer/safe_browsing/phishing_classifier_delegate.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/renderer/chrome_render_view_observer.h"	5 #include "chrome/renderer/chrome_render_view_observer.h"

6	6

7 #include "base/bind.h"	7 #include "base/bind.h"

8 #include "base/bind_helpers.h"	8 #include "base/bind_helpers.h"

9 #include "base/command_line.h"	9 #include "base/command_line.h"

10 #include "base/debug/trace_event.h"	10 #include "base/debug/trace_event.h"

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
89	89

90 // maximum number of characters in the document to index, any text beyond this	90 // maximum number of characters in the document to index, any text beyond this

91 // point will be clipped	91 // point will be clipped

92 static const size_t kMaxIndexChars = 65535;	92 static const size_t kMaxIndexChars = 65535;

93	93

94 // Constants for UMA statistic collection.	94 // Constants for UMA statistic collection.

95 static const char kTranslateCaptureText[] = "Translate.CaptureText";	95 static const char kTranslateCaptureText[] = "Translate.CaptureText";

96	96

97 namespace {	97 namespace {

98	98

99 GURL StripRef(const GURL& url) {

100 GURL::Replacements replacements;

101 replacements.ClearRef();

102 return url.ReplaceComponents(replacements);

103 }

104

105 #if defined(OS_ANDROID)	99 #if defined(OS_ANDROID)

106 // Parses the DOM for a <meta> tag with a particular name.	100 // Parses the DOM for a <meta> tag with a particular name.

107 // \|meta_tag_content\| is set to the contents of the 'content' attribute.	101 // \|meta_tag_content\| is set to the contents of the 'content' attribute.

108 // \|found_tag\| is set to true if the tag was successfully found.	102 // \|found_tag\| is set to true if the tag was successfully found.

109 // Returns true if the document was parsed without errors.	103 // Returns true if the document was parsed without errors.

110 bool RetrieveMetaTagContent(const WebFrame* main_frame,	104 bool RetrieveMetaTagContent(const WebFrame* main_frame,

111 const GURL& expected_url,	105 const GURL& expected_url,

112 const std::string& meta_tag_name,	106 const std::string& meta_tag_name,

113 bool* found_tag,	107 bool* found_tag,

114 std::string* meta_tag_content) {	108 std::string* meta_tag_content) {

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
154	148

155 } // namespace	149 } // namespace

156	150

157 ChromeRenderViewObserver::ChromeRenderViewObserver(	151 ChromeRenderViewObserver::ChromeRenderViewObserver(

158 content::RenderView* render_view,	152 content::RenderView* render_view,

159 ChromeRenderProcessObserver* chrome_render_process_observer)	153 ChromeRenderProcessObserver* chrome_render_process_observer)

160 : content::RenderViewObserver(render_view),	154 : content::RenderViewObserver(render_view),

161 chrome_render_process_observer_(chrome_render_process_observer),	155 chrome_render_process_observer_(chrome_render_process_observer),

162 translate_helper_(new TranslateHelper(render_view)),	156 translate_helper_(new TranslateHelper(render_view)),

163 phishing_classifier_(NULL),	157 phishing_classifier_(NULL),

164 last_indexed_page_id_(-1),

165 capture_timer_(false, false) {	158 capture_timer_(false, false) {

166 const CommandLine& command_line = *CommandLine::ForCurrentProcess();	159 const CommandLine& command_line = *CommandLine::ForCurrentProcess();

167 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection))	160 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection))

168 OnSetClientSidePhishingDetection(true);	161 OnSetClientSidePhishingDetection(true);

169 }	162 }

170	163

171 ChromeRenderViewObserver::~ChromeRenderViewObserver() {	164 ChromeRenderViewObserver::~ChromeRenderViewObserver() {

172 }	165 }

173	166

174 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) {	167 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) {

(...skipping 171 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
346 Send(new ChromeViewHostMsg_PageHasOSDD(	339 Send(new ChromeViewHostMsg_PageHasOSDD(

347 routing_id(), main_frame->document().url(), osdd_url,	340 routing_id(), main_frame->document().url(), osdd_url,

348 search_provider::AUTODETECTED_PROVIDER));	341 search_provider::AUTODETECTED_PROVIDER));

349 }	342 }

350	343

351 // Don't capture pages including refresh meta tag.	344 // Don't capture pages including refresh meta tag.

352 if (HasRefreshMetaTag(main_frame))	345 if (HasRefreshMetaTag(main_frame))

353 return;	346 return;

354	347

355 CapturePageInfoLater(	348 CapturePageInfoLater(

356 render_view()->GetPageId(),

357 false, // preliminary_capture	349 false, // preliminary_capture

358 base::TimeDelta::FromMilliseconds(	350 base::TimeDelta::FromMilliseconds(

359 render_view()->GetContentStateImmediately() ?	351 render_view()->GetContentStateImmediately() ?

360 0 : kDelayForCaptureMs));	352 0 : kDelayForCaptureMs));

361 }	353 }

362	354

363 void ChromeRenderViewObserver::DidCommitProvisionalLoad(	355 void ChromeRenderViewObserver::DidCommitProvisionalLoad(

364 WebLocalFrame* frame, bool is_new_navigation) {	356 WebLocalFrame* frame, bool is_new_navigation) {

365 // Don't capture pages being not new, or including refresh meta tag.	357 // Don't capture pages being not new, or including refresh meta tag.

366 if (!is_new_navigation \|\| HasRefreshMetaTag(frame))	358 if (!is_new_navigation \|\| HasRefreshMetaTag(frame)) {

	359 capture_timer_.Stop();

367 return;	360 return;

	361 }

368	362

369 CapturePageInfoLater(	363 CapturePageInfoLater(

370 render_view()->GetPageId(),

371 true, // preliminary_capture	364 true, // preliminary_capture

372 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs));	365 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs));

373 }	366 }

374	367

375 void ChromeRenderViewObserver::CapturePageInfoLater(int page_id,	368 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture,

376 bool preliminary_capture,

377 base::TimeDelta delay) {	369 base::TimeDelta delay) {

378 capture_timer_.Start(	370 capture_timer_.Start(

379 FROM_HERE,	371 FROM_HERE,

380 delay,	372 delay,

381 base::Bind(&ChromeRenderViewObserver::CapturePageInfo,	373 base::Bind(&ChromeRenderViewObserver::CapturePageInfo,

382 base::Unretained(this),	374 base::Unretained(this),

383 page_id,

384 preliminary_capture));	375 preliminary_capture));

385 }	376 }

386	377

387 void ChromeRenderViewObserver::CapturePageInfo(int page_id,	378 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) {

388 bool preliminary_capture) {

389 // If \|page_id\| is obsolete, we should stop indexing and capturing a page.

390 if (render_view()->GetPageId() != page_id)

391 return;

392

393 if (!render_view()->GetWebView())	379 if (!render_view()->GetWebView())

394 return;	380 return;

395	381

396 WebFrame* main_frame = render_view()->GetWebView()->mainFrame();	382 WebFrame* main_frame = render_view()->GetWebView()->mainFrame();

397 if (!main_frame)	383 if (!main_frame)

398 return;	384 return;

399	385

400 // Don't index/capture pages that are in view source mode.	386 // Don't index/capture pages that are in view source mode.

401 if (main_frame->isViewSourceModeEnabled())	387 if (main_frame->isViewSourceModeEnabled())

402 return;	388 return;

(...skipping 13 matching lines...) Expand all Loading...
416 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the	402 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the

417 // translate helper for language detection and possible translation.	403 // translate helper for language detection and possible translation.

418 base::string16 contents;	404 base::string16 contents;

419 base::TimeTicks capture_begin_time = base::TimeTicks::Now();	405 base::TimeTicks capture_begin_time = base::TimeTicks::Now();

420 CaptureText(main_frame, &contents);	406 CaptureText(main_frame, &contents);

421 UMA_HISTOGRAM_TIMES(kTranslateCaptureText,	407 UMA_HISTOGRAM_TIMES(kTranslateCaptureText,

422 base::TimeTicks::Now() - capture_begin_time);	408 base::TimeTicks::Now() - capture_begin_time);

423 if (translate_helper_)	409 if (translate_helper_)

424 translate_helper_->PageCaptured(contents);	410 translate_helper_->PageCaptured(contents);

425	411

426 // TODO(shess): Is indexing "Full text search" indexing? In that

427 // case more of this can go.

428 // Skip indexing if this is not a new load. Note that the case where

429 // page_id == last_indexed_page_id_ is more complicated, since we need to

430 // reindex if the toplevel URL has changed (such as from a redirect), even

431 // though this may not cause the page id to be incremented.

432 if (page_id < last_indexed_page_id_)

433 return;

434

435 bool same_page_id = last_indexed_page_id_ == page_id;

436 if (!preliminary_capture)

437 last_indexed_page_id_ = page_id;

438

439 // Get the URL for this page.

440 GURL url(main_frame->document().url());

441 if (url.is_empty()) {

442 if (!preliminary_capture)

443 last_indexed_url_ = GURL();

444 return;

445 }

446

447 // If the page id is unchanged, check whether the URL (ignoring fragments)

448 // has changed. If so, we need to reindex. Otherwise, assume this is a

449 // reload, in-page navigation, or some other load type where we don't want to

450 // reindex. Note: subframe navigations after onload increment the page id,

451 // so these will trigger a reindex.

452 GURL stripped_url(StripRef(url));

453 if (same_page_id && stripped_url == last_indexed_url_)

454 return;

455

456 if (!preliminary_capture)

457 last_indexed_url_ = stripped_url;

458

459 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo");	412 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo");

460	413

461 #if defined(FULL_SAFE_BROWSING)	414 #if defined(FULL_SAFE_BROWSING)

462 // Will swap out the string.	415 // Will swap out the string.

463 if (phishing_classifier_)	416 if (phishing_classifier_)

464 phishing_classifier_->PageCaptured(&contents, preliminary_capture);	417 phishing_classifier_->PageCaptured(&contents, preliminary_capture);

465 #endif	418 #endif

466 }	419 }

467	420

468 void ChromeRenderViewObserver::CaptureText(WebFrame* frame,	421 void ChromeRenderViewObserver::CaptureText(WebFrame* frame,

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
514 WebElement element = node.to<WebElement>();	467 WebElement element = node.to<WebElement>();

515 if (!element.hasHTMLTagName(tag_name))	468 if (!element.hasHTMLTagName(tag_name))

516 continue;	469 continue;

517 WebString value = element.getAttribute(attribute_name);	470 WebString value = element.getAttribute(attribute_name);

518 if (value.isNull() \|\| !LowerCaseEqualsASCII(value, "refresh"))	471 if (value.isNull() \|\| !LowerCaseEqualsASCII(value, "refresh"))

519 continue;	472 continue;

520 return true;	473 return true;

521 }	474 }

522 return false;	475 return false;

523 }	476 }

OLD	NEW