Chromium Code Reviews| Index: chrome/browser/safe_browsing/browser_feature_extractor.cc |
| diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.cc b/chrome/browser/safe_browsing/browser_feature_extractor.cc |
| index 9cf4770b28bf8fd6109a86bf07ca89be8170bebf..e706a636b293601e2422b242286ca619aa43c664 100644 |
| --- a/chrome/browser/safe_browsing/browser_feature_extractor.cc |
| +++ b/chrome/browser/safe_browsing/browser_feature_extractor.cc |
| @@ -9,6 +9,7 @@ |
| #include "base/stl_util.h" |
| #include "base/stringprintf.h" |
| +#include "base/string_util.h" |
| #include "base/task.h" |
| #include "base/time.h" |
| #include "chrome/common/safe_browsing/csd.pb.h" |
| @@ -27,7 +28,7 @@ |
| namespace safe_browsing { |
| -const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5; |
| +const int BrowserFeatureExtractor::kHashPrefixLength = 5; |
| BrowseInfo::BrowseInfo() {} |
| @@ -453,9 +454,27 @@ void BrowserFeatureExtractor::ComputeURLHash( |
| &host, &path, &query); |
| DCHECK(!host.empty()) << request->url(); |
| DCHECK(!path.empty()) << request->url(); |
| - request->set_suffix_prefix_hash( |
| - crypto::SHA256HashString(host + path).substr( |
| - 0, kSuffixPrefixHashLength)); |
| + |
| + // Lowercase the URL. Note: canoncalization converts the URL to ASCII. |
|
Brian Ryner
2011/08/30 00:35:10
canoncalization -> canonicalization
noelutz
2011/08/30 00:39:52
Done.
|
| + // Percent encoded characters will not be lowercased but this is consistent |
| + // with what we're doing on the server side. |
| + StringToLowerASCII(&host); |
| + StringToLowerASCII(&path); |
| + |
| + // Remove leading 'www.' from the host. |
| + if (host.size() > 4 && host.substr(0, 4) == "www.") { |
| + host.erase(0, 4); |
| + } |
| + // Checks if the pattern's path is open: not ending with '/'. |
|
Brian Ryner
2011/08/30 00:35:10
I think this comment came from some server-side lo
noelutz
2011/08/30 00:39:52
Done.
|
| + if (path.size() > 1 && *(path.rbegin()) != '/') { |
| + // The pattern never ends in foo.com/test? because we stripped CGI params. |
| + // Remove everything that comes after the last '/'. |
| + size_t last_path = path.rfind("/"); |
| + path.erase(last_path + 1); |
| + } |
| + |
| + request->set_hash_prefix(crypto::SHA256HashString(host + path).substr( |
| + 0, kHashPrefixLength)); |
| } |
| }; // namespace safe_browsing |