Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(212)

Side by Side Diff: chrome/renderer/safe_browsing/features.cc

Issue 2878046: Add an extractor for DOM features to be used for client side phishing detection. (Closed)
Patch Set: address marria's comments Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/features.h" 5 #include "chrome/renderer/safe_browsing/features.h"
6 6
7 #include "base/histogram.h" 7 #include "base/histogram.h"
8 #include "base/logging.h" 8 #include "base/logging.h"
9 9
10 namespace safe_browsing { 10 namespace safe_browsing {
11 11
12 const size_t FeatureMap::kMaxFeatureMapSize = 10000; 12 const size_t FeatureMap::kMaxFeatureMapSize = 10000;
13 13
14 FeatureMap::FeatureMap() {} 14 FeatureMap::FeatureMap() {}
15 FeatureMap::~FeatureMap() {} 15 FeatureMap::~FeatureMap() {}
16 16
17 bool FeatureMap::AddBooleanFeature(const std::string& name) { 17 bool FeatureMap::AddBooleanFeature(const std::string& name) {
18 return AddRealFeature(name, 1.0);
19 }
20
21 bool FeatureMap::AddRealFeature(const std::string& name, double value) {
18 if (features_.size() >= kMaxFeatureMapSize) { 22 if (features_.size() >= kMaxFeatureMapSize) {
19 // If we hit this case, it indicates that either kMaxFeatureMapSize is 23 // If we hit this case, it indicates that either kMaxFeatureMapSize is
20 // too small, or there is a bug causing too many features to be added. 24 // too small, or there is a bug causing too many features to be added.
21 // In this case, we'll log to a histogram so we can see that this is 25 // In this case, we'll log to a histogram so we can see that this is
22 // happening, and make phishing classification fail silently. 26 // happening, and make phishing classification fail silently.
23 LOG(ERROR) << "Not adding feature: " << name << " because the " 27 LOG(ERROR) << "Not adding feature: " << name << " because the "
24 << "feature map is too large."; 28 << "feature map is too large.";
25 UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1); 29 UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1);
26 return false; 30 return false;
27 } 31 }
28 features_[name] = 1.0; 32 // We only expect features in the range [0.0, 1.0], so fail if the feature is
33 // outside this range.
34 if (value < 0.0 || value > 1.0) {
35 LOG(ERROR) << "Not adding feature: " << name << " because the value "
36 << value << " is not in the range [0.0, 1.0].";
37 UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1);
38 return false;
39 }
40
41 features_[name] = value;
29 return true; 42 return true;
30 } 43 }
31 44
32 void FeatureMap::Clear() { 45 void FeatureMap::Clear() {
33 features_.clear(); 46 features_.clear();
34 } 47 }
35 48
36 namespace features { 49 namespace features {
37 // URL host features 50 // URL host features
38 const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress"; 51 const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress";
39 const char kUrlTldToken[] = "UrlTld="; 52 const char kUrlTldToken[] = "UrlTld=";
40 const char kUrlDomainToken[] = "UrlDomain="; 53 const char kUrlDomainToken[] = "UrlDomain=";
41 const char kUrlOtherHostToken[] = "UrlOtherHostToken="; 54 const char kUrlOtherHostToken[] = "UrlOtherHostToken=";
42 55
43 // URL host aggregate features 56 // URL host aggregate features
44 const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1"; 57 const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1";
45 const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3"; 58 const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3";
46 59
47 // URL path features 60 // URL path features
48 const char kUrlPathToken[] = "UrlPathToken="; 61 const char kUrlPathToken[] = "UrlPathToken=";
49 62
63 // DOM HTML form features
64 const char kPageHasForms[] = "PageHasForms";
65 const char kPageActionOtherDomainFreq[] = "PageActionOtherDomainFreq";
66 const char kPageHasTextInputs[] = "PageHasTextInputs";
67 const char kPageHasPswdInputs[] = "PageHasPswdInputs";
68 const char kPageHasRadioInputs[] = "PageHasRadioInputs";
69 const char kPageHasCheckInputs[] = "PageHasCheckInputs";
70
71 // DOM HTML link features
72 const char kPageExternalLinksFreq[] = "PageExternalLinksFreq";
73 const char kPageLinkDomain[] = "PageLinkDomain=";
74 const char kPageSecureLinksFreq[] = "PageSecureLinksFreq";
75
76 // DOM HTML script features
77 const char kPageNumScriptTagsGTOne[] = "PageNumScriptTags>1";
78 const char kPageNumScriptTagsGTSix[] = "PageNumScriptTags>6";
79
80 // Other DOM HTML features
81 const char kPageImgOtherDomainFreq[] = "PageImgOtherDomainFreq";
82
50 } // namespace features 83 } // namespace features
51 } // namespace safe_browsing 84 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/renderer/safe_browsing/features.h ('k') | chrome/renderer/safe_browsing/features_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698