chrome/browser/safe_browsing/browser_feature_extractor.cc - Issue 7793012: Change the client-side phishing detection hashing function to

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 7793012: Change the client-side phishing detection hashing function to (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Address Brian's comments Created 9 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « chrome/browser/safe_browsing/browser_feature_extractor.h ('k') | chrome/browser/safe_browsing/browser_feature_extractor_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/safe_browsing/browser_feature_extractor.cc

diff --git a/chrome/browser/safe_browsing/browser_feature_extractor.cc b/chrome/browser/safe_browsing/browser_feature_extractor.cc

index 9cf4770b28bf8fd6109a86bf07ca89be8170bebf..69415a54517a12616ebe1381d1ba30bfb2815deb 100644

--- a/chrome/browser/safe_browsing/browser_feature_extractor.cc

+++ b/chrome/browser/safe_browsing/browser_feature_extractor.cc

@@ -9,6 +9,7 @@

#include "base/stl_util.h"

#include "base/stringprintf.h"

+#include "base/string_util.h"

#include "base/task.h"

#include "base/time.h"

#include "chrome/common/safe_browsing/csd.pb.h"

@@ -27,7 +28,7 @@

namespace safe_browsing {

-const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5;

+const int BrowserFeatureExtractor::kHashPrefixLength = 5;

BrowseInfo::BrowseInfo() {}

@@ -453,9 +454,27 @@ void BrowserFeatureExtractor::ComputeURLHash(

&host, &path, &query);

DCHECK(!host.empty()) << request->url();

DCHECK(!path.empty()) << request->url();

- request->set_suffix_prefix_hash(

- crypto::SHA256HashString(host + path).substr(

- 0, kSuffixPrefixHashLength));

+ // Lowercase the URL. Note: canonicalization converts the URL to ASCII.

+ // Percent encoded characters will not be lowercased but this is consistent

+ // with what we're doing on the server side.

+ StringToLowerASCII(&host);

+ StringToLowerASCII(&path);

+ // Remove leading 'www.' from the host.

+ if (host.size() > 4 && host.substr(0, 4) == "www.") {

+ host.erase(0, 4);

+ }

+ // Remove everything after the last '/' to broaden the pattern.

+ if (path.size() > 1 && *(path.rbegin()) != '/') {

+ // The pattern never ends in foo.com/test? because we stripped CGI params.

+ // Remove everything that comes after the last '/'.

+ size_t last_path = path.rfind("/");

+ path.erase(last_path + 1);

+ }

+ request->set_hash_prefix(crypto::SHA256HashString(host + path).substr(

+ 0, kHashPrefixLength));

}

}; // namespace safe_browsing