OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" | 5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
9 #include "base/hash_tables.h" | 9 #include "base/hash_tables.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
(...skipping 369 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
380 } | 380 } |
381 | 381 |
382 void PhishingDOMFeatureExtractor::ResetFrameData() { | 382 void PhishingDOMFeatureExtractor::ResetFrameData() { |
383 DCHECK(!cur_document_.isNull()); | 383 DCHECK(!cur_document_.isNull()); |
384 DCHECK(!cur_frame_data_.get()); | 384 DCHECK(!cur_frame_data_.get()); |
385 | 385 |
386 cur_frame_data_.reset(new FrameData()); | 386 cur_frame_data_.reset(new FrameData()); |
387 cur_frame_data_->elements = cur_document_.all(); | 387 cur_frame_data_->elements = cur_document_.all(); |
388 cur_frame_data_->domain = | 388 cur_frame_data_->domain = |
389 net::RegistryControlledDomainService::GetDomainAndRegistry( | 389 net::RegistryControlledDomainService::GetDomainAndRegistry( |
390 cur_document_.url()); | 390 cur_document_.url(), net::RCDS::EXCLUDE_PRIVATE_REGISTRIES); |
391 } | 391 } |
392 | 392 |
393 WebKit::WebDocument PhishingDOMFeatureExtractor::GetNextDocument() { | 393 WebKit::WebDocument PhishingDOMFeatureExtractor::GetNextDocument() { |
394 DCHECK(!cur_document_.isNull()); | 394 DCHECK(!cur_document_.isNull()); |
395 WebKit::WebFrame* frame = cur_document_.frame(); | 395 WebKit::WebFrame* frame = cur_document_.frame(); |
396 // Advance to the next frame that contains a document, with no wrapping. | 396 // Advance to the next frame that contains a document, with no wrapping. |
397 if (frame) { | 397 if (frame) { |
398 while ((frame = frame->traverseNext(false))) { | 398 while ((frame = frame->traverseNext(false))) { |
399 if (!frame->document().isNull()) { | 399 if (!frame->document().isNull()) { |
400 return frame->document(); | 400 return frame->document(); |
(...skipping 15 matching lines...) Expand all Loading... |
416 if (cur_frame_data_->domain.empty()) { | 416 if (cur_frame_data_->domain.empty()) { |
417 return false; | 417 return false; |
418 } | 418 } |
419 | 419 |
420 // TODO(bryner): Ensure that the url encoding is consistent with the features | 420 // TODO(bryner): Ensure that the url encoding is consistent with the features |
421 // in the model. | 421 // in the model. |
422 if (url.HostIsIPAddress()) { | 422 if (url.HostIsIPAddress()) { |
423 domain->assign(url.host()); | 423 domain->assign(url.host()); |
424 } else { | 424 } else { |
425 domain->assign(net::RegistryControlledDomainService::GetDomainAndRegistry( | 425 domain->assign(net::RegistryControlledDomainService::GetDomainAndRegistry( |
426 url)); | 426 url, net::RCDS::EXCLUDE_PRIVATE_REGISTRIES)); |
427 } | 427 } |
428 | 428 |
429 return !domain->empty() && *domain != cur_frame_data_->domain; | 429 return !domain->empty() && *domain != cur_frame_data_->domain; |
430 } | 430 } |
431 | 431 |
432 void PhishingDOMFeatureExtractor::InsertFeatures() { | 432 void PhishingDOMFeatureExtractor::InsertFeatures() { |
433 DCHECK(page_feature_state_.get()); | 433 DCHECK(page_feature_state_.get()); |
434 | 434 |
435 if (page_feature_state_->total_links > 0) { | 435 if (page_feature_state_->total_links > 0) { |
436 // Add a feature for the fraction of times the page links to an external | 436 // Add a feature for the fraction of times the page links to an external |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
490 // Record number of script tags (discretized for numerical stability.) | 490 // Record number of script tags (discretized for numerical stability.) |
491 if (page_feature_state_->num_script_tags > 1) { | 491 if (page_feature_state_->num_script_tags > 1) { |
492 features_->AddBooleanFeature(features::kPageNumScriptTagsGTOne); | 492 features_->AddBooleanFeature(features::kPageNumScriptTagsGTOne); |
493 if (page_feature_state_->num_script_tags > 6) { | 493 if (page_feature_state_->num_script_tags > 6) { |
494 features_->AddBooleanFeature(features::kPageNumScriptTagsGTSix); | 494 features_->AddBooleanFeature(features::kPageNumScriptTagsGTSix); |
495 } | 495 } |
496 } | 496 } |
497 } | 497 } |
498 | 498 |
499 } // namespace safe_browsing | 499 } // namespace safe_browsing |
OLD | NEW |