Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(382)

Side by Side Diff: content/common/cross_site_document_classifier.cc

Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@rename_policy_to_sniffer3
Patch Set: Pull Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
alexmos 2015/06/13 00:27:03 2015
ncarter (slow) 2015/06/16 22:43:22 Done.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/child/site_isolation_policy.h" 5 #include "content/common/cross_site_document_classifier.h"
6 6
7 #include "base/basictypes.h" 7 #include "base/basictypes.h"
8 #include "base/command_line.h" 8 #include "base/command_line.h"
9 #include "base/lazy_instance.h" 9 #include "base/lazy_instance.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "base/metrics/histogram.h" 11 #include "base/metrics/histogram.h"
12 #include "base/strings/string_util.h" 12 #include "base/strings/string_util.h"
13 #include "content/public/common/content_switches.h" 13 #include "content/public/common/content_switches.h"
14 #include "content/public/common/resource_response_info.h" 14 #include "content/public/common/resource_response_info.h"
15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
16 #include "net/http/http_response_headers.h" 16 #include "net/http/http_response_headers.h"
17 17
18 using base::StringPiece; 18 using base::StringPiece;
19 19
20 namespace content { 20 namespace content {
21 21
22 namespace { 22 namespace {
23 23
24 // The gathering of UMA stats for site isolation is deactivated by default, and
25 // only activated in renderer processes.
26 static bool g_stats_gathering_enabled = false;
27
28 // MIME types 24 // MIME types
29 const char kTextHtml[] = "text/html"; 25 const char kTextHtml[] = "text/html";
30 const char kTextXml[] = "text/xml"; 26 const char kTextXml[] = "text/xml";
31 const char xAppRssXml[] = "application/rss+xml"; 27 const char xAppRssXml[] = "application/rss+xml";
32 const char kAppXml[] = "application/xml"; 28 const char kAppXml[] = "application/xml";
33 const char kAppJson[] = "application/json"; 29 const char kAppJson[] = "application/json";
34 const char kTextJson[] = "text/json"; 30 const char kTextJson[] = "text/json";
35 const char kTextXjson[] = "text/x-json"; 31 const char kTextXjson[] = "text/x-json";
36 const char kTextPlain[] = "text/plain"; 32 const char kTextPlain[] = "text/plain";
37 33
38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted
39 // when this class is used for actual blocking.
40 bool IsRenderableStatusCode(int status_code) {
41 // Chrome only uses the content of a response with one of these status codes
42 // for CSS/JavaScript. For images, Chrome just ignores status code.
43 const int renderable_status_code[] = {
44 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};
45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {
46 if (renderable_status_code[i] == status_code)
47 return true;
48 }
49 return false;
50 }
51
52 bool MatchesSignature(StringPiece data, 34 bool MatchesSignature(StringPiece data,
53 const StringPiece signatures[], 35 const StringPiece signatures[],
54 size_t arr_size) { 36 size_t arr_size) {
55 size_t offset = data.find_first_not_of(" \t\r\n"); 37 size_t offset = data.find_first_not_of(" \t\r\n");
56 // There is no not-whitespace character in this document. 38 // There is no not-whitespace character in this document.
57 if (offset == base::StringPiece::npos) 39 if (offset == base::StringPiece::npos)
58 return false; 40 return false;
59 41
60 data.remove_prefix(offset); 42 data.remove_prefix(offset);
61 size_t length = data.length(); 43 size_t length = data.length();
62 44
63 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { 45 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {
64 const StringPiece& signature = signatures[sig_index]; 46 const StringPiece& signature = signatures[sig_index];
65 size_t signature_length = signature.length(); 47 size_t signature_length = signature.length();
66 if (length < signature_length) 48 if (length < signature_length)
67 continue; 49 continue;
68 50
69 if (LowerCaseEqualsASCII(data.begin(), data.begin() + signature_length, 51 if (LowerCaseEqualsASCII(data.begin(), data.begin() + signature_length,
70 signature.data())) 52 signature.data()))
71 return true; 53 return true;
72 } 54 }
73 return false; 55 return false;
74 } 56 }
75 57
76 void IncrementHistogramCount(const std::string& name) {
77 // The default value of min, max, bucket_count are copied from histogram.h.
78 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(
79 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);
80 histogram_pointer->Add(1);
81 }
82
83 void IncrementHistogramEnum(const std::string& name,
84 uint32 sample,
85 uint32 boundary_value) {
86 // The default value of min, max, bucket_count are copied from histogram.h.
87 base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet(
88 name, 1, boundary_value, boundary_value + 1,
89 base::HistogramBase::kUmaTargetedHistogramFlag);
90 histogram_pointer->Add(sample);
91 }
92
93 void HistogramCountBlockedResponse(
94 const std::string& bucket_prefix,
95 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,
96 bool nosniff_block) {
97 std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked");
98 IncrementHistogramCount(bucket_prefix + block_label);
99
100 // The content is blocked if it is sniffed as HTML/JSON/XML. When
101 // the blocked response is with an error status code, it is not
102 // disruptive for the following reasons : 1) the blocked content is
103 // not a binary object (such as an image) since it is sniffed as
104 // text; 2) then, this blocking only breaks the renderer behavior
105 // only if it is either JavaScript or CSS. However, the renderer
106 // doesn't use the contents of JS/CSS with unaffected status code
107 // (e.g, 404). 3) the renderer is expected not to use the cross-site
108 // document content for purposes other than JS/CSS (e.g, XHR).
109 bool renderable_status_code =
110 IsRenderableStatusCode(resp_data->http_status_code);
111
112 if (renderable_status_code) {
113 IncrementHistogramEnum(
114 bucket_prefix + block_label + ".RenderableStatusCode",
115 resp_data->resource_type, RESOURCE_TYPE_LAST_TYPE);
116 } else {
117 IncrementHistogramCount(bucket_prefix + block_label +
118 ".NonRenderableStatusCode");
119 }
120 }
121
122 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix,
123 bool sniffed_as_js) {
124 IncrementHistogramCount(bucket_prefix + ".NotBlocked");
125 if (sniffed_as_js)
126 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS");
127 }
128
129 } // namespace 58 } // namespace
130 59
131 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {
132 }
133
134 void SiteIsolationStatsGatherer::SetEnabled(bool enabled) {
135 g_stats_gathering_enabled = enabled;
136 }
137
138 linked_ptr<SiteIsolationResponseMetaData>
139 SiteIsolationStatsGatherer::OnReceivedResponse(
140 const GURL& frame_origin,
141 const GURL& response_url,
142 ResourceType resource_type,
143 int origin_pid,
144 const ResourceResponseInfo& info) {
145 if (!g_stats_gathering_enabled)
146 return linked_ptr<SiteIsolationResponseMetaData>();
147
148 // if |origin_pid| is non-zero, it means that this response is for a plugin
149 // spawned from this renderer process. We exclude responses for plugins for
150 // now, but eventually, we're going to make plugin processes directly talk to
151 // the browser process so that we don't apply cross-site document blocking to
152 // them.
153 if (origin_pid)
154 return linked_ptr<SiteIsolationResponseMetaData>();
155
156 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);
157
158 // See if this is for navigation. If it is, don't block it, under the
159 // assumption that we will put it in an appropriate process.
160 if (IsResourceTypeFrame(resource_type))
161 return linked_ptr<SiteIsolationResponseMetaData>();
162
163 if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url))
164 return linked_ptr<SiteIsolationResponseMetaData>();
165
166 if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url))
167 return linked_ptr<SiteIsolationResponseMetaData>();
168
169 CrossSiteDocumentMimeType canonical_mime_type =
170 CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type);
171
172 if (canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS)
173 return linked_ptr<SiteIsolationResponseMetaData>();
174
175 // Every CORS request should have the Access-Control-Allow-Origin header even
176 // if it is preceded by a pre-flight request. Therefore, if this is a CORS
177 // request, it has this header. response.httpHeaderField() internally uses
178 // case-insensitive matching for the header name.
179 std::string access_control_origin;
180
181 // We can use a case-insensitive header name for EnumerateHeader().
182 info.headers->EnumerateHeader(NULL, "access-control-allow-origin",
183 &access_control_origin);
184 if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet(
185 frame_origin, response_url, access_control_origin))
186 return linked_ptr<SiteIsolationResponseMetaData>();
187
188 // Real XSD data collection starts from here.
189 std::string no_sniff;
190 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);
191
192 linked_ptr<SiteIsolationResponseMetaData> resp_data(
193 new SiteIsolationResponseMetaData);
194 resp_data->frame_origin = frame_origin.spec();
195 resp_data->response_url = response_url;
196 resp_data->resource_type = resource_type;
197 resp_data->canonical_mime_type = canonical_mime_type;
198 resp_data->http_status_code = info.headers->response_code();
199 resp_data->no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff");
200
201 return resp_data;
202 }
203
204 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(
205 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,
206 const char* raw_data,
207 int raw_length) {
208 if (!g_stats_gathering_enabled)
209 return false;
210
211 DCHECK(resp_data.get());
212
213 StringPiece data(raw_data, raw_length);
214
215 // Record the length of the first received chunk of data to see if it's enough
216 // for sniffing.
217 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);
218
219 // Record the number of cross-site document responses with a specific mime
220 // type (text/html, text/xml, etc).
221 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",
222 resp_data->canonical_mime_type,
223 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX);
224
225 // Store the result of cross-site document blocking analysis.
226 bool would_block = false;
227 bool sniffed_as_js = SniffForJS(data);
228
229 // Record the number of responses whose content is sniffed for what its mime
230 // type claims it to be. For example, we apply a HTML sniffer for a document
231 // tagged with text/html here. Whenever this check becomes true, we'll block
232 // the response.
233 if (resp_data->canonical_mime_type != CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN) {
234 std::string bucket_prefix;
235 bool sniffed_as_target_document = false;
236 if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_HTML) {
237 bucket_prefix = "SiteIsolation.XSD.HTML";
238 sniffed_as_target_document =
239 CrossSiteDocumentClassifier::SniffForHTML(data);
240 } else if (resp_data->canonical_mime_type ==
241 CROSS_SITE_DOCUMENT_MIME_TYPE_XML) {
242 bucket_prefix = "SiteIsolation.XSD.XML";
243 sniffed_as_target_document =
244 CrossSiteDocumentClassifier::SniffForXML(data);
245 } else if (resp_data->canonical_mime_type ==
246 CROSS_SITE_DOCUMENT_MIME_TYPE_JSON) {
247 bucket_prefix = "SiteIsolation.XSD.JSON";
248 sniffed_as_target_document =
249 CrossSiteDocumentClassifier::SniffForJSON(data);
250 } else {
251 NOTREACHED() << "Not a blockable mime type: "
252 << resp_data->canonical_mime_type;
253 }
254
255 if (sniffed_as_target_document) {
256 would_block = true;
257 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
258 } else {
259 if (resp_data->no_sniff) {
260 would_block = true;
261 HistogramCountBlockedResponse(bucket_prefix, resp_data, true);
262 } else {
263 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js);
264 }
265 }
266 } else {
267 // This block is for plain text documents. We apply our HTML, XML,
268 // and JSON sniffer to a text document in the order, and block it
269 // if any of them succeeds in sniffing.
270 std::string bucket_prefix;
271 if (CrossSiteDocumentClassifier::SniffForHTML(data))
272 bucket_prefix = "SiteIsolation.XSD.Plain.HTML";
273 else if (CrossSiteDocumentClassifier::SniffForXML(data))
274 bucket_prefix = "SiteIsolation.XSD.Plain.XML";
275 else if (CrossSiteDocumentClassifier::SniffForJSON(data))
276 bucket_prefix = "SiteIsolation.XSD.Plain.JSON";
277
278 if (bucket_prefix.size() > 0) {
279 would_block = true;
280 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
281 } else if (resp_data->no_sniff) {
282 would_block = true;
283 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);
284 } else {
285 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",
286 sniffed_as_js);
287 }
288 }
289
290 return would_block;
291 }
292
293 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType( 60 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType(
294 const std::string& mime_type) { 61 const std::string& mime_type) {
295 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) { 62 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {
296 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML; 63 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML;
297 } 64 }
298 65
299 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) { 66 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {
300 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN; 67 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN;
301 } 68 }
302 69
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
476 break; 243 break;
477 case kColonState: 244 case kColonState:
478 case kTerminalState: 245 case kTerminalState:
479 NOTREACHED(); 246 NOTREACHED();
480 break; 247 break;
481 } 248 }
482 } 249 }
483 return state == kColonState; 250 return state == kColonState;
484 } 251 }
485 252
486 bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) {
487 // The purpose of this function is to try to see if there's any possibility
488 // that this data can be JavaScript (superset of JS). Search for "var " for JS
489 // detection. This is a real hack and should only be used for stats gathering.
490 return data.find("var ") != base::StringPiece::npos;
491 }
492
493 } // namespace content 253 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698