Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2512)

Unified Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 7793012: Change the client-side phishing detection hashing function to (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address Brian's comments Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/safe_browsing/browser_feature_extractor.cc
diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.cc b/chrome/browser/safe_browsing/browser_feature_extractor.cc
index 9cf4770b28bf8fd6109a86bf07ca89be8170bebf..69415a54517a12616ebe1381d1ba30bfb2815deb 100644
--- a/chrome/browser/safe_browsing/browser_feature_extractor.cc
+++ b/chrome/browser/safe_browsing/browser_feature_extractor.cc
@@ -9,6 +9,7 @@
#include "base/stl_util.h"
#include "base/stringprintf.h"
+#include "base/string_util.h"
#include "base/task.h"
#include "base/time.h"
#include "chrome/common/safe_browsing/csd.pb.h"
@@ -27,7 +28,7 @@
namespace safe_browsing {
-const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5;
+const int BrowserFeatureExtractor::kHashPrefixLength = 5;
BrowseInfo::BrowseInfo() {}
@@ -453,9 +454,27 @@ void BrowserFeatureExtractor::ComputeURLHash(
&host, &path, &query);
DCHECK(!host.empty()) << request->url();
DCHECK(!path.empty()) << request->url();
- request->set_suffix_prefix_hash(
- crypto::SHA256HashString(host + path).substr(
- 0, kSuffixPrefixHashLength));
+
+ // Lowercase the URL. Note: canonicalization converts the URL to ASCII.
+ // Percent encoded characters will not be lowercased but this is consistent
+ // with what we're doing on the server side.
+ StringToLowerASCII(&host);
+ StringToLowerASCII(&path);
+
+ // Remove leading 'www.' from the host.
+ if (host.size() > 4 && host.substr(0, 4) == "www.") {
+ host.erase(0, 4);
+ }
+ // Remove everything after the last '/' to broaden the pattern.
+ if (path.size() > 1 && *(path.rbegin()) != '/') {
+ // The pattern never ends in foo.com/test? because we stripped CGI params.
+ // Remove everything that comes after the last '/'.
+ size_t last_path = path.rfind("/");
+ path.erase(last_path + 1);
+ }
+
+ request->set_hash_prefix(crypto::SHA256HashString(host + path).substr(
+ 0, kHashPrefixLength));
}
}; // namespace safe_browsing

Powered by Google App Engine
This is Rietveld 408576698