| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" | 5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" |
| 6 | 6 |
| 7 #include <map> | 7 #include <map> |
| 8 #include <utility> | 8 #include <utility> |
| 9 | 9 |
| 10 #include "base/stl_util.h" | 10 #include "base/stl_util.h" |
| 11 #include "base/stringprintf.h" | 11 #include "base/stringprintf.h" |
| 12 #include "base/string_util.h" |
| 12 #include "base/task.h" | 13 #include "base/task.h" |
| 13 #include "base/time.h" | 14 #include "base/time.h" |
| 14 #include "chrome/common/safe_browsing/csd.pb.h" | 15 #include "chrome/common/safe_browsing/csd.pb.h" |
| 15 #include "chrome/browser/history/history.h" | 16 #include "chrome/browser/history/history.h" |
| 16 #include "chrome/browser/history/history_types.h" | 17 #include "chrome/browser/history/history_types.h" |
| 17 #include "chrome/browser/profiles/profile.h" | 18 #include "chrome/browser/profiles/profile.h" |
| 18 #include "chrome/browser/safe_browsing/browser_features.h" | 19 #include "chrome/browser/safe_browsing/browser_features.h" |
| 19 #include "chrome/browser/safe_browsing/client_side_detection_service.h" | 20 #include "chrome/browser/safe_browsing/client_side_detection_service.h" |
| 20 #include "chrome/browser/safe_browsing/safe_browsing_util.h" | 21 #include "chrome/browser/safe_browsing/safe_browsing_util.h" |
| 21 #include "content/common/page_transition_types.h" | 22 #include "content/common/page_transition_types.h" |
| 22 #include "content/browser/browser_thread.h" | 23 #include "content/browser/browser_thread.h" |
| 23 #include "content/browser/cancelable_request.h" | 24 #include "content/browser/cancelable_request.h" |
| 24 #include "content/browser/tab_contents/tab_contents.h" | 25 #include "content/browser/tab_contents/tab_contents.h" |
| 25 #include "crypto/sha2.h" | 26 #include "crypto/sha2.h" |
| 26 #include "googleurl/src/gurl.h" | 27 #include "googleurl/src/gurl.h" |
| 27 | 28 |
| 28 namespace safe_browsing { | 29 namespace safe_browsing { |
| 29 | 30 |
| 30 const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5; | 31 const int BrowserFeatureExtractor::kHashPrefixLength = 5; |
| 31 | 32 |
| 32 BrowseInfo::BrowseInfo() {} | 33 BrowseInfo::BrowseInfo() {} |
| 33 | 34 |
| 34 BrowseInfo::~BrowseInfo() {} | 35 BrowseInfo::~BrowseInfo() {} |
| 35 | 36 |
| 36 static void AddFeature(const std::string& feature_name, | 37 static void AddFeature(const std::string& feature_name, |
| 37 double feature_value, | 38 double feature_value, |
| 38 ClientPhishingRequest* request) { | 39 ClientPhishingRequest* request) { |
| 39 DCHECK(request); | 40 DCHECK(request); |
| 40 ClientPhishingRequest::Feature* feature = | 41 ClientPhishingRequest::Feature* feature = |
| (...skipping 405 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 446 | 447 |
| 447 void BrowserFeatureExtractor::ComputeURLHash( | 448 void BrowserFeatureExtractor::ComputeURLHash( |
| 448 ClientPhishingRequest* request) { | 449 ClientPhishingRequest* request) { |
| 449 // Put the url into SafeBrowsing host suffix / path prefix format, with | 450 // Put the url into SafeBrowsing host suffix / path prefix format, with |
| 450 // query parameters stripped. | 451 // query parameters stripped. |
| 451 std::string host, path, query; | 452 std::string host, path, query; |
| 452 safe_browsing_util::CanonicalizeUrl(GURL(request->url()), | 453 safe_browsing_util::CanonicalizeUrl(GURL(request->url()), |
| 453 &host, &path, &query); | 454 &host, &path, &query); |
| 454 DCHECK(!host.empty()) << request->url(); | 455 DCHECK(!host.empty()) << request->url(); |
| 455 DCHECK(!path.empty()) << request->url(); | 456 DCHECK(!path.empty()) << request->url(); |
| 456 request->set_suffix_prefix_hash( | 457 |
| 457 crypto::SHA256HashString(host + path).substr( | 458 // Lowercase the URL. Note: canonicalization converts the URL to ASCII. |
| 458 0, kSuffixPrefixHashLength)); | 459 // Percent encoded characters will not be lowercased but this is consistent |
| 460 // with what we're doing on the server side. |
| 461 StringToLowerASCII(&host); |
| 462 StringToLowerASCII(&path); |
| 463 |
| 464 // Remove leading 'www.' from the host. |
| 465 if (host.size() > 4 && host.substr(0, 4) == "www.") { |
| 466 host.erase(0, 4); |
| 467 } |
| 468 // Remove everything after the last '/' to broaden the pattern. |
| 469 if (path.size() > 1 && *(path.rbegin()) != '/') { |
| 470 // The pattern never ends in foo.com/test? because we stripped CGI params. |
| 471 // Remove everything that comes after the last '/'. |
| 472 size_t last_path = path.rfind("/"); |
| 473 path.erase(last_path + 1); |
| 474 } |
| 475 |
| 476 request->set_hash_prefix(crypto::SHA256HashString(host + path).substr( |
| 477 0, kHashPrefixLength)); |
| 459 } | 478 } |
| 460 | 479 |
| 461 }; // namespace safe_browsing | 480 }; // namespace safe_browsing |
| OLD | NEW |