Chromium Code Reviews| Index: chrome/browser/safe_browsing/off_domain_inclusion_detector.cc |
| diff --git a/chrome/browser/safe_browsing/off_domain_inclusion_detector.cc b/chrome/browser/safe_browsing/off_domain_inclusion_detector.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..442179a4ef503aa1c97c49665d642b5bbdc3c9cd |
| --- /dev/null |
| +++ b/chrome/browser/safe_browsing/off_domain_inclusion_detector.cc |
| @@ -0,0 +1,108 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "chrome/browser/safe_browsing/off_domain_inclusion_detector.h" |
| + |
| +#include <string> |
| + |
| +#include "base/metrics/histogram.h" |
| +#include "content/public/browser/resource_request_info.h" |
| +#include "net/url_request/url_request.h" |
| +#include "url/gurl.h" |
| + |
| +namespace safe_browsing { |
|
mattm
2014/09/09 21:20:16
Can you add tests? I found one example of testing
|
| + |
| +void OffDomainInclusionDetector::OnResourceRequest( |
| + const net::URLRequest* request, |
| + content::ResourceType resource_type) { |
| + // Only look at actual net requests (e.g., not chrome-extensions://id/foo.js). |
| + if (!request->url().SchemeIsHTTPOrHTTPS()) |
| + return; |
| + |
| + const content::ResourceRequestInfo* request_info = |
| + content::ResourceRequestInfo::ForRequest(request); |
| + |
| + // Make an explicit decision about every resource type (causing compiler |
| + // errors should a new resource type be added). |
| + switch (resource_type) { |
| + case content::RESOURCE_TYPE_MAIN_FRAME: |
| + // Analyze inclusions in the main frame, not the main frame itself. |
| + return; |
| + case content::RESOURCE_TYPE_SUB_FRAME: |
| + DCHECK(!request_info->IsMainFrame()); |
| + // Only analyze top-level frames within the main frame. |
| + if (!request_info->ParentIsMainFrame()) |
| + return; |
| + break; |
| + case content::RESOURCE_TYPE_STYLESHEET: |
| + case content::RESOURCE_TYPE_SCRIPT: |
| + case content::RESOURCE_TYPE_IMAGE: |
| + case content::RESOURCE_TYPE_FONT_RESOURCE: |
| + case content::RESOURCE_TYPE_SUB_RESOURCE: |
| + case content::RESOURCE_TYPE_OBJECT: |
| + case content::RESOURCE_TYPE_MEDIA: |
| + case content::RESOURCE_TYPE_XHR: |
| + // Types above are to be analyzed for off-domain inclusion if they are |
| + // loaded as part of the main frame. |
| + if (!request_info->IsMainFrame()) |
| + return; |
| + break; |
| + case content::RESOURCE_TYPE_WORKER: |
| + case content::RESOURCE_TYPE_SHARED_WORKER: |
| + case content::RESOURCE_TYPE_PREFETCH: |
| + case content::RESOURCE_TYPE_FAVICON: |
| + case content::RESOURCE_TYPE_PING: |
| + case content::RESOURCE_TYPE_SERVICE_WORKER: |
| + // Types above are not to be analyzed for off-domain inclusion. |
| + return; |
| + case content::RESOURCE_TYPE_LAST_TYPE: |
| + NOTREACHED(); |
| + return; |
| + } |
| + |
| + // Record the type of request analyzed to be able to do ratio analysis w.r.t |
| + // other histograms below. |
| + UMA_HISTOGRAM_ENUMERATION("SBOffDomainInclusion.RequestAnalyzed", |
| + resource_type, |
| + content::RESOURCE_TYPE_LAST_TYPE); |
| + |
| + const GURL main_frame_url(request->referrer()); |
| + if (!main_frame_url.is_valid()) { |
| + if (main_frame_url.is_empty()) { |
| + // This can happen in a few scenarios where the referer is dropped (e.g., |
| + // HTTPS => HTTP requests). Consider adding the original referer to |
| + // ResourceRequestInfo if that's an issue. |
|
mattm
2014/09/09 21:20:16
I also have a todo about looking into keeping thes
gab
2014/11/12 20:49:34
Acknowledged.
|
| + UMA_HISTOGRAM_ENUMERATION("SBOffDomainInclusion.EmptyMainFrameURL", |
| + resource_type, |
| + content::RESOURCE_TYPE_LAST_TYPE); |
| + } else { |
| + // There is no reason for the main frame to start loading resources if its |
| + // own URL is invalid but measure this in the wild to make sure. |
| + UMA_HISTOGRAM_ENUMERATION("SBOffDomainInclusion.InvalidMainFrameURL", |
| + resource_type, |
| + content::RESOURCE_TYPE_LAST_TYPE); |
| + } |
| + } else { |
| + // Cut |main_frame_url| down to its host's second-level domain. |
|
mattm
2014/09/09 21:20:16
Should use registry_controlled_domains::GetDomainA
gab
2014/11/12 20:49:34
Oh awesome!!! I was looking for exactly that, I ha
|
| + std::string second_level_host_domain = main_frame_url.host(); |
| + |
| + const char kDomainPartDelimiter = '.'; |
| + size_t tld_delim = |
| + second_level_host_domain.find_last_of(kDomainPartDelimiter); |
| + if (tld_delim != std::string::npos && tld_delim != 0U) { |
| + size_t sld_delim = second_level_host_domain.find_last_of( |
| + kDomainPartDelimiter, tld_delim - 1); |
| + if (sld_delim != std::string::npos) |
| + second_level_host_domain.erase(0U, sld_delim + 1U); |
| + } |
| + |
| + if (!request->url().DomainIs(second_level_host_domain.c_str())) { |
| + UMA_HISTOGRAM_ENUMERATION("SBOffDomainInclusion.Detected", |
| + resource_type, |
| + content::RESOURCE_TYPE_LAST_TYPE); |
| + } |
| + } |
| +} |
| + |
| +} // namespace safe_browsing |