| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/features.h" | 5 #include "chrome/renderer/safe_browsing/features.h" |
| 6 | 6 |
| 7 #include "base/histogram.h" | 7 #include "base/histogram.h" |
| 8 #include "base/logging.h" | 8 #include "base/logging.h" |
| 9 | 9 |
| 10 namespace safe_browsing { | 10 namespace safe_browsing { |
| 11 | 11 |
| 12 const size_t FeatureMap::kMaxFeatureMapSize = 10000; | 12 const size_t FeatureMap::kMaxFeatureMapSize = 10000; |
| 13 | 13 |
| 14 FeatureMap::FeatureMap() {} | 14 FeatureMap::FeatureMap() {} |
| 15 FeatureMap::~FeatureMap() {} | 15 FeatureMap::~FeatureMap() {} |
| 16 | 16 |
| 17 bool FeatureMap::AddBooleanFeature(const std::string& name) { | 17 bool FeatureMap::AddBooleanFeature(const std::string& name) { |
| 18 return AddRealFeature(name, 1.0); |
| 19 } |
| 20 |
| 21 bool FeatureMap::AddRealFeature(const std::string& name, double value) { |
| 18 if (features_.size() >= kMaxFeatureMapSize) { | 22 if (features_.size() >= kMaxFeatureMapSize) { |
| 19 // If we hit this case, it indicates that either kMaxFeatureMapSize is | 23 // If we hit this case, it indicates that either kMaxFeatureMapSize is |
| 20 // too small, or there is a bug causing too many features to be added. | 24 // too small, or there is a bug causing too many features to be added. |
| 21 // In this case, we'll log to a histogram so we can see that this is | 25 // In this case, we'll log to a histogram so we can see that this is |
| 22 // happening, and make phishing classification fail silently. | 26 // happening, and make phishing classification fail silently. |
| 23 LOG(ERROR) << "Not adding feature: " << name << " because the " | 27 LOG(ERROR) << "Not adding feature: " << name << " because the " |
| 24 << "feature map is too large."; | 28 << "feature map is too large."; |
| 25 UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1); | 29 UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1); |
| 26 return false; | 30 return false; |
| 27 } | 31 } |
| 28 features_[name] = 1.0; | 32 // We only expect features in the range [0.0, 1.0], so fail if the feature is |
| 33 // outside this range. |
| 34 if (value < 0.0 || value > 1.0) { |
| 35 LOG(ERROR) << "Not adding feature: " << name << " because the value " |
| 36 << value << " is not in the range [0.0, 1.0]."; |
| 37 UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1); |
| 38 return false; |
| 39 } |
| 40 |
| 41 features_[name] = value; |
| 29 return true; | 42 return true; |
| 30 } | 43 } |
| 31 | 44 |
| 32 void FeatureMap::Clear() { | 45 void FeatureMap::Clear() { |
| 33 features_.clear(); | 46 features_.clear(); |
| 34 } | 47 } |
| 35 | 48 |
| 36 namespace features { | 49 namespace features { |
| 37 // URL host features | 50 // URL host features |
| 38 const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress"; | 51 const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress"; |
| 39 const char kUrlTldToken[] = "UrlTld="; | 52 const char kUrlTldToken[] = "UrlTld="; |
| 40 const char kUrlDomainToken[] = "UrlDomain="; | 53 const char kUrlDomainToken[] = "UrlDomain="; |
| 41 const char kUrlOtherHostToken[] = "UrlOtherHostToken="; | 54 const char kUrlOtherHostToken[] = "UrlOtherHostToken="; |
| 42 | 55 |
| 43 // URL host aggregate features | 56 // URL host aggregate features |
| 44 const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1"; | 57 const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1"; |
| 45 const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3"; | 58 const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3"; |
| 46 | 59 |
| 47 // URL path features | 60 // URL path features |
| 48 const char kUrlPathToken[] = "UrlPathToken="; | 61 const char kUrlPathToken[] = "UrlPathToken="; |
| 49 | 62 |
| 63 // DOM HTML form features |
| 64 const char kPageHasForms[] = "PageHasForms"; |
| 65 const char kPageActionOtherDomainFreq[] = "PageActionOtherDomainFreq"; |
| 66 const char kPageHasTextInputs[] = "PageHasTextInputs"; |
| 67 const char kPageHasPswdInputs[] = "PageHasPswdInputs"; |
| 68 const char kPageHasRadioInputs[] = "PageHasRadioInputs"; |
| 69 const char kPageHasCheckInputs[] = "PageHasCheckInputs"; |
| 70 |
| 71 // DOM HTML link features |
| 72 const char kPageExternalLinksFreq[] = "PageExternalLinksFreq"; |
| 73 const char kPageLinkDomain[] = "PageLinkDomain="; |
| 74 const char kPageSecureLinksFreq[] = "PageSecureLinksFreq"; |
| 75 |
| 76 // DOM HTML script features |
| 77 const char kPageNumScriptTagsGTOne[] = "PageNumScriptTags>1"; |
| 78 const char kPageNumScriptTagsGTSix[] = "PageNumScriptTags>6"; |
| 79 |
| 80 // Other DOM HTML features |
| 81 const char kPageImgOtherDomainFreq[] = "PageImgOtherDomainFreq"; |
| 82 |
| 50 } // namespace features | 83 } // namespace features |
| 51 } // namespace safe_browsing | 84 } // namespace safe_browsing |
| OLD | NEW |