Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(158)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc

Issue 15140003: Add support for split Public Suffix List distinctions. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebased again Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" 5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
6 6
7 #include "base/bind.h" 7 #include "base/bind.h"
8 #include "base/compiler_specific.h" 8 #include "base/compiler_specific.h"
9 #include "base/hash_tables.h" 9 #include "base/hash_tables.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
(...skipping 368 matching lines...) Expand 10 before | Expand all | Expand 10 after
379 cur_document_.reset(); 379 cur_document_.reset();
380 } 380 }
381 381
382 void PhishingDOMFeatureExtractor::ResetFrameData() { 382 void PhishingDOMFeatureExtractor::ResetFrameData() {
383 DCHECK(!cur_document_.isNull()); 383 DCHECK(!cur_document_.isNull());
384 DCHECK(!cur_frame_data_.get()); 384 DCHECK(!cur_frame_data_.get());
385 385
386 cur_frame_data_.reset(new FrameData()); 386 cur_frame_data_.reset(new FrameData());
387 cur_frame_data_->elements = cur_document_.all(); 387 cur_frame_data_->elements = cur_document_.all();
388 cur_frame_data_->domain = 388 cur_frame_data_->domain =
389 net::RegistryControlledDomainService::GetDomainAndRegistry( 389 net::registry_controlled_domains::GetDomainAndRegistry(
390 cur_document_.url()); 390 cur_document_.url(),
391 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
391 } 392 }
392 393
393 WebKit::WebDocument PhishingDOMFeatureExtractor::GetNextDocument() { 394 WebKit::WebDocument PhishingDOMFeatureExtractor::GetNextDocument() {
394 DCHECK(!cur_document_.isNull()); 395 DCHECK(!cur_document_.isNull());
395 WebKit::WebFrame* frame = cur_document_.frame(); 396 WebKit::WebFrame* frame = cur_document_.frame();
396 // Advance to the next frame that contains a document, with no wrapping. 397 // Advance to the next frame that contains a document, with no wrapping.
397 if (frame) { 398 if (frame) {
398 while ((frame = frame->traverseNext(false))) { 399 while ((frame = frame->traverseNext(false))) {
399 if (!frame->document().isNull()) { 400 if (!frame->document().isNull()) {
400 return frame->document(); 401 return frame->document();
(...skipping 14 matching lines...) Expand all
415 416
416 if (cur_frame_data_->domain.empty()) { 417 if (cur_frame_data_->domain.empty()) {
417 return false; 418 return false;
418 } 419 }
419 420
420 // TODO(bryner): Ensure that the url encoding is consistent with the features 421 // TODO(bryner): Ensure that the url encoding is consistent with the features
421 // in the model. 422 // in the model.
422 if (url.HostIsIPAddress()) { 423 if (url.HostIsIPAddress()) {
423 domain->assign(url.host()); 424 domain->assign(url.host());
424 } else { 425 } else {
425 domain->assign(net::RegistryControlledDomainService::GetDomainAndRegistry( 426 domain->assign(net::registry_controlled_domains::GetDomainAndRegistry(
426 url)); 427 url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES));
427 } 428 }
428 429
429 return !domain->empty() && *domain != cur_frame_data_->domain; 430 return !domain->empty() && *domain != cur_frame_data_->domain;
430 } 431 }
431 432
432 void PhishingDOMFeatureExtractor::InsertFeatures() { 433 void PhishingDOMFeatureExtractor::InsertFeatures() {
433 DCHECK(page_feature_state_.get()); 434 DCHECK(page_feature_state_.get());
434 435
435 if (page_feature_state_->total_links > 0) { 436 if (page_feature_state_->total_links > 0) {
436 // Add a feature for the fraction of times the page links to an external 437 // Add a feature for the fraction of times the page links to an external
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
490 // Record number of script tags (discretized for numerical stability.) 491 // Record number of script tags (discretized for numerical stability.)
491 if (page_feature_state_->num_script_tags > 1) { 492 if (page_feature_state_->num_script_tags > 1) {
492 features_->AddBooleanFeature(features::kPageNumScriptTagsGTOne); 493 features_->AddBooleanFeature(features::kPageNumScriptTagsGTOne);
493 if (page_feature_state_->num_script_tags > 6) { 494 if (page_feature_state_->num_script_tags > 6) {
494 features_->AddBooleanFeature(features::kPageNumScriptTagsGTSix); 495 features_->AddBooleanFeature(features::kPageNumScriptTagsGTSix);
495 } 496 }
496 } 497 }
497 } 498 }
498 499
499 } // namespace safe_browsing 500 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698