Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(119)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 7793012: Change the client-side phishing detection hashing function to (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address Brian's comments Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" 5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6 6
7 #include <map> 7 #include <map>
8 #include <utility> 8 #include <utility>
9 9
10 #include "base/stl_util.h" 10 #include "base/stl_util.h"
11 #include "base/stringprintf.h" 11 #include "base/stringprintf.h"
12 #include "base/string_util.h"
12 #include "base/task.h" 13 #include "base/task.h"
13 #include "base/time.h" 14 #include "base/time.h"
14 #include "chrome/common/safe_browsing/csd.pb.h" 15 #include "chrome/common/safe_browsing/csd.pb.h"
15 #include "chrome/browser/history/history.h" 16 #include "chrome/browser/history/history.h"
16 #include "chrome/browser/history/history_types.h" 17 #include "chrome/browser/history/history_types.h"
17 #include "chrome/browser/profiles/profile.h" 18 #include "chrome/browser/profiles/profile.h"
18 #include "chrome/browser/safe_browsing/browser_features.h" 19 #include "chrome/browser/safe_browsing/browser_features.h"
19 #include "chrome/browser/safe_browsing/client_side_detection_service.h" 20 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_util.h" 21 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
21 #include "content/common/page_transition_types.h" 22 #include "content/common/page_transition_types.h"
22 #include "content/browser/browser_thread.h" 23 #include "content/browser/browser_thread.h"
23 #include "content/browser/cancelable_request.h" 24 #include "content/browser/cancelable_request.h"
24 #include "content/browser/tab_contents/tab_contents.h" 25 #include "content/browser/tab_contents/tab_contents.h"
25 #include "crypto/sha2.h" 26 #include "crypto/sha2.h"
26 #include "googleurl/src/gurl.h" 27 #include "googleurl/src/gurl.h"
27 28
28 namespace safe_browsing { 29 namespace safe_browsing {
29 30
30 const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5; 31 const int BrowserFeatureExtractor::kHashPrefixLength = 5;
31 32
32 BrowseInfo::BrowseInfo() {} 33 BrowseInfo::BrowseInfo() {}
33 34
34 BrowseInfo::~BrowseInfo() {} 35 BrowseInfo::~BrowseInfo() {}
35 36
36 static void AddFeature(const std::string& feature_name, 37 static void AddFeature(const std::string& feature_name,
37 double feature_value, 38 double feature_value,
38 ClientPhishingRequest* request) { 39 ClientPhishingRequest* request) {
39 DCHECK(request); 40 DCHECK(request);
40 ClientPhishingRequest::Feature* feature = 41 ClientPhishingRequest::Feature* feature =
(...skipping 405 matching lines...) Expand 10 before | Expand all | Expand 10 after
446 447
447 void BrowserFeatureExtractor::ComputeURLHash( 448 void BrowserFeatureExtractor::ComputeURLHash(
448 ClientPhishingRequest* request) { 449 ClientPhishingRequest* request) {
449 // Put the url into SafeBrowsing host suffix / path prefix format, with 450 // Put the url into SafeBrowsing host suffix / path prefix format, with
450 // query parameters stripped. 451 // query parameters stripped.
451 std::string host, path, query; 452 std::string host, path, query;
452 safe_browsing_util::CanonicalizeUrl(GURL(request->url()), 453 safe_browsing_util::CanonicalizeUrl(GURL(request->url()),
453 &host, &path, &query); 454 &host, &path, &query);
454 DCHECK(!host.empty()) << request->url(); 455 DCHECK(!host.empty()) << request->url();
455 DCHECK(!path.empty()) << request->url(); 456 DCHECK(!path.empty()) << request->url();
456 request->set_suffix_prefix_hash( 457
457 crypto::SHA256HashString(host + path).substr( 458 // Lowercase the URL. Note: canonicalization converts the URL to ASCII.
458 0, kSuffixPrefixHashLength)); 459 // Percent encoded characters will not be lowercased but this is consistent
460 // with what we're doing on the server side.
461 StringToLowerASCII(&host);
462 StringToLowerASCII(&path);
463
464 // Remove leading 'www.' from the host.
465 if (host.size() > 4 && host.substr(0, 4) == "www.") {
466 host.erase(0, 4);
467 }
468 // Remove everything after the last '/' to broaden the pattern.
469 if (path.size() > 1 && *(path.rbegin()) != '/') {
470 // The pattern never ends in foo.com/test? because we stripped CGI params.
471 // Remove everything that comes after the last '/'.
472 size_t last_path = path.rfind("/");
473 path.erase(last_path + 1);
474 }
475
476 request->set_hash_prefix(crypto::SHA256HashString(host + path).substr(
477 0, kHashPrefixLength));
459 } 478 }
460 479
461 }; // namespace safe_browsing 480 }; // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698