| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" |
| 6 | 6 |
| 7 #include "base/bind.h" | 7 #include "base/bind.h" |
| 8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
| 9 #include "base/hash_tables.h" | 9 #include "base/hash_tables.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| (...skipping 369 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 380 } | 380 } |
| 381 | 381 |
| 382 void PhishingDOMFeatureExtractor::ResetFrameData() { | 382 void PhishingDOMFeatureExtractor::ResetFrameData() { |
| 383 DCHECK(!cur_document_.isNull()); | 383 DCHECK(!cur_document_.isNull()); |
| 384 DCHECK(!cur_frame_data_.get()); | 384 DCHECK(!cur_frame_data_.get()); |
| 385 | 385 |
| 386 cur_frame_data_.reset(new FrameData()); | 386 cur_frame_data_.reset(new FrameData()); |
| 387 cur_frame_data_->elements = cur_document_.all(); | 387 cur_frame_data_->elements = cur_document_.all(); |
| 388 cur_frame_data_->domain = | 388 cur_frame_data_->domain = |
| 389 net::RegistryControlledDomainService::GetDomainAndRegistry( | 389 net::RegistryControlledDomainService::GetDomainAndRegistry( |
| 390 cur_document_.url()); | 390 cur_document_.url(), net::RCDS::EXCLUDE_PRIVATE_REGISTRIES); |
| 391 } | 391 } |
| 392 | 392 |
| 393 WebKit::WebDocument PhishingDOMFeatureExtractor::GetNextDocument() { | 393 WebKit::WebDocument PhishingDOMFeatureExtractor::GetNextDocument() { |
| 394 DCHECK(!cur_document_.isNull()); | 394 DCHECK(!cur_document_.isNull()); |
| 395 WebKit::WebFrame* frame = cur_document_.frame(); | 395 WebKit::WebFrame* frame = cur_document_.frame(); |
| 396 // Advance to the next frame that contains a document, with no wrapping. | 396 // Advance to the next frame that contains a document, with no wrapping. |
| 397 if (frame) { | 397 if (frame) { |
| 398 while ((frame = frame->traverseNext(false))) { | 398 while ((frame = frame->traverseNext(false))) { |
| 399 if (!frame->document().isNull()) { | 399 if (!frame->document().isNull()) { |
| 400 return frame->document(); | 400 return frame->document(); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 416 if (cur_frame_data_->domain.empty()) { | 416 if (cur_frame_data_->domain.empty()) { |
| 417 return false; | 417 return false; |
| 418 } | 418 } |
| 419 | 419 |
| 420 // TODO(bryner): Ensure that the url encoding is consistent with the features | 420 // TODO(bryner): Ensure that the url encoding is consistent with the features |
| 421 // in the model. | 421 // in the model. |
| 422 if (url.HostIsIPAddress()) { | 422 if (url.HostIsIPAddress()) { |
| 423 domain->assign(url.host()); | 423 domain->assign(url.host()); |
| 424 } else { | 424 } else { |
| 425 domain->assign(net::RegistryControlledDomainService::GetDomainAndRegistry( | 425 domain->assign(net::RegistryControlledDomainService::GetDomainAndRegistry( |
| 426 url)); | 426 url, net::RCDS::EXCLUDE_PRIVATE_REGISTRIES)); |
| 427 } | 427 } |
| 428 | 428 |
| 429 return !domain->empty() && *domain != cur_frame_data_->domain; | 429 return !domain->empty() && *domain != cur_frame_data_->domain; |
| 430 } | 430 } |
| 431 | 431 |
| 432 void PhishingDOMFeatureExtractor::InsertFeatures() { | 432 void PhishingDOMFeatureExtractor::InsertFeatures() { |
| 433 DCHECK(page_feature_state_.get()); | 433 DCHECK(page_feature_state_.get()); |
| 434 | 434 |
| 435 if (page_feature_state_->total_links > 0) { | 435 if (page_feature_state_->total_links > 0) { |
| 436 // Add a feature for the fraction of times the page links to an external | 436 // Add a feature for the fraction of times the page links to an external |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 490 // Record number of script tags (discretized for numerical stability.) | 490 // Record number of script tags (discretized for numerical stability.) |
| 491 if (page_feature_state_->num_script_tags > 1) { | 491 if (page_feature_state_->num_script_tags > 1) { |
| 492 features_->AddBooleanFeature(features::kPageNumScriptTagsGTOne); | 492 features_->AddBooleanFeature(features::kPageNumScriptTagsGTOne); |
| 493 if (page_feature_state_->num_script_tags > 6) { | 493 if (page_feature_state_->num_script_tags > 6) { |
| 494 features_->AddBooleanFeature(features::kPageNumScriptTagsGTSix); | 494 features_->AddBooleanFeature(features::kPageNumScriptTagsGTSix); |
| 495 } | 495 } |
| 496 } | 496 } |
| 497 } | 497 } |
| 498 | 498 |
| 499 } // namespace safe_browsing | 499 } // namespace safe_browsing |
| OLD | NEW |