Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(39)

Side by Side Diff: content/child/site_isolation_policy.cc

Issue 1181493002: [Patch 3 of 6] Split out content/child's SiteIsolationPolicy into two new classes. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@rename_policy_to_sniffer2
Patch Set: Yet more comment fixes. Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/child/site_isolation_policy.h" 5 #include "content/child/site_isolation_policy.h"
6 6
7 #include "base/basictypes.h" 7 #include "base/basictypes.h"
8 #include "base/command_line.h" 8 #include "base/command_line.h"
9 #include "base/lazy_instance.h" 9 #include "base/lazy_instance.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "base/metrics/histogram.h" 11 #include "base/metrics/histogram.h"
12 #include "base/strings/string_util.h" 12 #include "base/strings/string_util.h"
13 #include "content/public/common/content_switches.h" 13 #include "content/public/common/content_switches.h"
14 #include "content/public/common/resource_response_info.h" 14 #include "content/public/common/resource_response_info.h"
15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
16 #include "net/http/http_response_headers.h" 16 #include "net/http/http_response_headers.h"
17 17
18 using base::StringPiece; 18 using base::StringPiece;
19 19
20 namespace content { 20 namespace content {
21 21
22 namespace { 22 namespace {
23 23
24 // The cross-site document blocking/UMA data collection is deactivated by 24 // The gathering of UMA stats for site isolation is deactivated by default, and
25 // default, and only activated in renderer processes. 25 // only activated in renderer processes.
26 static bool g_policy_enabled = false; 26 static bool g_stats_gathering_enabled = false;
27 27
28 // MIME types 28 // MIME types
29 const char kTextHtml[] = "text/html"; 29 const char kTextHtml[] = "text/html";
30 const char kTextXml[] = "text/xml"; 30 const char kTextXml[] = "text/xml";
31 const char xAppRssXml[] = "application/rss+xml"; 31 const char xAppRssXml[] = "application/rss+xml";
32 const char kAppXml[] = "application/xml"; 32 const char kAppXml[] = "application/xml";
33 const char kAppJson[] = "application/json"; 33 const char kAppJson[] = "application/json";
34 const char kTextJson[] = "text/json"; 34 const char kTextJson[] = "text/json";
35 const char kTextXjson[] = "text/x-json"; 35 const char kTextXjson[] = "text/x-json";
36 const char kTextPlain[] = "text/plain"; 36 const char kTextPlain[] = "text/plain";
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
124 IncrementHistogramCount(bucket_prefix + ".NotBlocked"); 124 IncrementHistogramCount(bucket_prefix + ".NotBlocked");
125 if (sniffed_as_js) 125 if (sniffed_as_js)
126 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS"); 126 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS");
127 } 127 }
128 128
129 } // namespace 129 } // namespace
130 130
131 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() { 131 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {
132 } 132 }
133 133
134 void SiteIsolationPolicy::SetPolicyEnabled(bool enabled) { 134 void SiteIsolationStatsGatherer::SetEnabled(bool enabled) {
135 g_policy_enabled = enabled; 135 g_stats_gathering_enabled = enabled;
136 } 136 }
137 137
138 linked_ptr<SiteIsolationResponseMetaData> 138 linked_ptr<SiteIsolationResponseMetaData>
139 SiteIsolationPolicy::OnReceivedResponse(const GURL& frame_origin, 139 SiteIsolationStatsGatherer::OnReceivedResponse(
140 const GURL& response_url, 140 const GURL& frame_origin,
141 ResourceType resource_type, 141 const GURL& response_url,
142 int origin_pid, 142 ResourceType resource_type,
143 const ResourceResponseInfo& info) { 143 int origin_pid,
144 if (!g_policy_enabled) 144 const ResourceResponseInfo& info) {
145 if (!g_stats_gathering_enabled)
145 return linked_ptr<SiteIsolationResponseMetaData>(); 146 return linked_ptr<SiteIsolationResponseMetaData>();
146 147
147 // if |origin_pid| is non-zero, it means that this response is for a plugin 148 // if |origin_pid| is non-zero, it means that this response is for a plugin
148 // spawned from this renderer process. We exclude responses for plugins for 149 // spawned from this renderer process. We exclude responses for plugins for
149 // now, but eventually, we're going to make plugin processes directly talk to 150 // now, but eventually, we're going to make plugin processes directly talk to
150 // the browser process so that we don't apply cross-site document blocking to 151 // the browser process so that we don't apply cross-site document blocking to
151 // them. 152 // them.
152 if (origin_pid) 153 if (origin_pid)
153 return linked_ptr<SiteIsolationResponseMetaData>(); 154 return linked_ptr<SiteIsolationResponseMetaData>();
154 155
155 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1); 156 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);
156 157
157 // See if this is for navigation. If it is, don't block it, under the 158 // See if this is for navigation. If it is, don't block it, under the
158 // assumption that we will put it in an appropriate process. 159 // assumption that we will put it in an appropriate process.
159 if (IsResourceTypeFrame(resource_type)) 160 if (IsResourceTypeFrame(resource_type))
160 return linked_ptr<SiteIsolationResponseMetaData>(); 161 return linked_ptr<SiteIsolationResponseMetaData>();
161 162
162 if (!IsBlockableScheme(response_url)) 163 if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url))
163 return linked_ptr<SiteIsolationResponseMetaData>(); 164 return linked_ptr<SiteIsolationResponseMetaData>();
164 165
165 if (IsSameSite(frame_origin, response_url)) 166 if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url))
166 return linked_ptr<SiteIsolationResponseMetaData>(); 167 return linked_ptr<SiteIsolationResponseMetaData>();
167 168
168 SiteIsolationResponseMetaData::CanonicalMimeType canonical_mime_type = 169 SiteIsolationResponseMetaData::CanonicalMimeType canonical_mime_type =
169 GetCanonicalMimeType(info.mime_type); 170 CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type);
170 171
171 if (canonical_mime_type == SiteIsolationResponseMetaData::Others) 172 if (canonical_mime_type == SiteIsolationResponseMetaData::Others)
172 return linked_ptr<SiteIsolationResponseMetaData>(); 173 return linked_ptr<SiteIsolationResponseMetaData>();
173 174
174 // Every CORS request should have the Access-Control-Allow-Origin header even 175 // Every CORS request should have the Access-Control-Allow-Origin header even
175 // if it is preceded by a pre-flight request. Therefore, if this is a CORS 176 // if it is preceded by a pre-flight request. Therefore, if this is a CORS
176 // request, it has this header. response.httpHeaderField() internally uses 177 // request, it has this header. response.httpHeaderField() internally uses
177 // case-insensitive matching for the header name. 178 // case-insensitive matching for the header name.
178 std::string access_control_origin; 179 std::string access_control_origin;
179 180
180 // We can use a case-insensitive header name for EnumerateHeader(). 181 // We can use a case-insensitive header name for EnumerateHeader().
181 info.headers->EnumerateHeader(NULL, "access-control-allow-origin", 182 info.headers->EnumerateHeader(NULL, "access-control-allow-origin",
182 &access_control_origin); 183 &access_control_origin);
183 if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) 184 if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet(
185 frame_origin, response_url, access_control_origin))
184 return linked_ptr<SiteIsolationResponseMetaData>(); 186 return linked_ptr<SiteIsolationResponseMetaData>();
185 187
186 // Real XSD data collection starts from here. 188 // Real XSD data collection starts from here.
187 std::string no_sniff; 189 std::string no_sniff;
188 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff); 190 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);
189 191
190 linked_ptr<SiteIsolationResponseMetaData> resp_data( 192 linked_ptr<SiteIsolationResponseMetaData> resp_data(
191 new SiteIsolationResponseMetaData); 193 new SiteIsolationResponseMetaData);
192 resp_data->frame_origin = frame_origin.spec(); 194 resp_data->frame_origin = frame_origin.spec();
193 resp_data->response_url = response_url; 195 resp_data->response_url = response_url;
194 resp_data->resource_type = resource_type; 196 resp_data->resource_type = resource_type;
195 resp_data->canonical_mime_type = canonical_mime_type; 197 resp_data->canonical_mime_type = canonical_mime_type;
196 resp_data->http_status_code = info.headers->response_code(); 198 resp_data->http_status_code = info.headers->response_code();
197 resp_data->no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff"); 199 resp_data->no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff");
198 200
199 return resp_data; 201 return resp_data;
200 } 202 }
201 203
202 bool SiteIsolationPolicy::OnReceivedFirstChunk( 204 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(
203 const linked_ptr<SiteIsolationResponseMetaData>& resp_data, 205 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,
204 const char* raw_data, 206 const char* raw_data,
205 int raw_length) { 207 int raw_length) {
206 if (!g_policy_enabled) 208 if (!g_stats_gathering_enabled)
207 return false; 209 return false;
208 210
209 DCHECK(resp_data.get()); 211 DCHECK(resp_data.get());
210 212
211 StringPiece data(raw_data, raw_length); 213 StringPiece data(raw_data, raw_length);
212 214
213 // Record the length of the first received chunk of data to see if it's enough 215 // Record the length of the first received chunk of data to see if it's enough
214 // for sniffing. 216 // for sniffing.
215 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length); 217 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);
216 218
217 // Record the number of cross-site document responses with a specific mime 219 // Record the number of cross-site document responses with a specific mime
218 // type (text/html, text/xml, etc). 220 // type (text/html, text/xml, etc).
219 UMA_HISTOGRAM_ENUMERATION( 221 UMA_HISTOGRAM_ENUMERATION(
220 "SiteIsolation.XSD.MimeType", resp_data->canonical_mime_type, 222 "SiteIsolation.XSD.MimeType", resp_data->canonical_mime_type,
221 SiteIsolationResponseMetaData::MaxCanonicalMimeType); 223 SiteIsolationResponseMetaData::MaxCanonicalMimeType);
222 224
223 // Store the result of cross-site document blocking analysis. 225 // Store the result of cross-site document blocking analysis.
224 bool would_block = false; 226 bool would_block = false;
225 bool sniffed_as_js = SniffForJS(data); 227 bool sniffed_as_js = SniffForJS(data);
226 228
227 // Record the number of responses whose content is sniffed for what its mime 229 // Record the number of responses whose content is sniffed for what its mime
228 // type claims it to be. For example, we apply a HTML sniffer for a document 230 // type claims it to be. For example, we apply a HTML sniffer for a document
229 // tagged with text/html here. Whenever this check becomes true, we'll block 231 // tagged with text/html here. Whenever this check becomes true, we'll block
230 // the response. 232 // the response.
231 if (resp_data->canonical_mime_type != SiteIsolationResponseMetaData::Plain) { 233 if (resp_data->canonical_mime_type != SiteIsolationResponseMetaData::Plain) {
232 std::string bucket_prefix; 234 std::string bucket_prefix;
233 bool sniffed_as_target_document = false; 235 bool sniffed_as_target_document = false;
234 if (resp_data->canonical_mime_type == SiteIsolationResponseMetaData::HTML) { 236 if (resp_data->canonical_mime_type == SiteIsolationResponseMetaData::HTML) {
235 bucket_prefix = "SiteIsolation.XSD.HTML"; 237 bucket_prefix = "SiteIsolation.XSD.HTML";
236 sniffed_as_target_document = SniffForHTML(data); 238 sniffed_as_target_document =
239 CrossSiteDocumentClassifier::SniffForHTML(data);
237 } else if (resp_data->canonical_mime_type == 240 } else if (resp_data->canonical_mime_type ==
238 SiteIsolationResponseMetaData::XML) { 241 SiteIsolationResponseMetaData::XML) {
239 bucket_prefix = "SiteIsolation.XSD.XML"; 242 bucket_prefix = "SiteIsolation.XSD.XML";
240 sniffed_as_target_document = SniffForXML(data); 243 sniffed_as_target_document =
244 CrossSiteDocumentClassifier::SniffForXML(data);
241 } else if (resp_data->canonical_mime_type == 245 } else if (resp_data->canonical_mime_type ==
242 SiteIsolationResponseMetaData::JSON) { 246 SiteIsolationResponseMetaData::JSON) {
243 bucket_prefix = "SiteIsolation.XSD.JSON"; 247 bucket_prefix = "SiteIsolation.XSD.JSON";
244 sniffed_as_target_document = SniffForJSON(data); 248 sniffed_as_target_document =
249 CrossSiteDocumentClassifier::SniffForJSON(data);
245 } else { 250 } else {
246 NOTREACHED() << "Not a blockable mime type: " 251 NOTREACHED() << "Not a blockable mime type: "
247 << resp_data->canonical_mime_type; 252 << resp_data->canonical_mime_type;
248 } 253 }
249 254
250 if (sniffed_as_target_document) { 255 if (sniffed_as_target_document) {
251 would_block = true; 256 would_block = true;
252 HistogramCountBlockedResponse(bucket_prefix, resp_data, false); 257 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
253 } else { 258 } else {
254 if (resp_data->no_sniff) { 259 if (resp_data->no_sniff) {
255 would_block = true; 260 would_block = true;
256 HistogramCountBlockedResponse(bucket_prefix, resp_data, true); 261 HistogramCountBlockedResponse(bucket_prefix, resp_data, true);
257 } else { 262 } else {
258 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js); 263 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js);
259 } 264 }
260 } 265 }
261 } else { 266 } else {
262 // This block is for plain text documents. We apply our HTML, XML, 267 // This block is for plain text documents. We apply our HTML, XML,
263 // and JSON sniffer to a text document in the order, and block it 268 // and JSON sniffer to a text document in the order, and block it
264 // if any of them succeeds in sniffing. 269 // if any of them succeeds in sniffing.
265 std::string bucket_prefix; 270 std::string bucket_prefix;
266 if (SniffForHTML(data)) 271 if (CrossSiteDocumentClassifier::SniffForHTML(data))
267 bucket_prefix = "SiteIsolation.XSD.Plain.HTML"; 272 bucket_prefix = "SiteIsolation.XSD.Plain.HTML";
268 else if (SniffForXML(data)) 273 else if (CrossSiteDocumentClassifier::SniffForXML(data))
269 bucket_prefix = "SiteIsolation.XSD.Plain.XML"; 274 bucket_prefix = "SiteIsolation.XSD.Plain.XML";
270 else if (SniffForJSON(data)) 275 else if (CrossSiteDocumentClassifier::SniffForJSON(data))
271 bucket_prefix = "SiteIsolation.XSD.Plain.JSON"; 276 bucket_prefix = "SiteIsolation.XSD.Plain.JSON";
272 277
273 if (bucket_prefix.size() > 0) { 278 if (bucket_prefix.size() > 0) {
274 would_block = true; 279 would_block = true;
275 HistogramCountBlockedResponse(bucket_prefix, resp_data, false); 280 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
276 } else if (resp_data->no_sniff) { 281 } else if (resp_data->no_sniff) {
277 would_block = true; 282 would_block = true;
278 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true); 283 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);
279 } else { 284 } else {
280 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain", 285 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",
281 sniffed_as_js); 286 sniffed_as_js);
282 } 287 }
283 } 288 }
284 289
285 return would_block; 290 return would_block;
286 } 291 }
287 292
288 SiteIsolationResponseMetaData::CanonicalMimeType 293 SiteIsolationResponseMetaData::CanonicalMimeType
289 SiteIsolationPolicy::GetCanonicalMimeType(const std::string& mime_type) { 294 CrossSiteDocumentClassifier::GetCanonicalMimeType(
295 const std::string& mime_type) {
290 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) { 296 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {
291 return SiteIsolationResponseMetaData::HTML; 297 return SiteIsolationResponseMetaData::HTML;
292 } 298 }
293 299
294 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) { 300 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {
295 return SiteIsolationResponseMetaData::Plain; 301 return SiteIsolationResponseMetaData::Plain;
296 } 302 }
297 303
298 if (LowerCaseEqualsASCII(mime_type, kAppJson) || 304 if (LowerCaseEqualsASCII(mime_type, kAppJson) ||
299 LowerCaseEqualsASCII(mime_type, kTextJson) || 305 LowerCaseEqualsASCII(mime_type, kTextJson) ||
300 LowerCaseEqualsASCII(mime_type, kTextXjson)) { 306 LowerCaseEqualsASCII(mime_type, kTextXjson)) {
301 return SiteIsolationResponseMetaData::JSON; 307 return SiteIsolationResponseMetaData::JSON;
302 } 308 }
303 309
304 if (LowerCaseEqualsASCII(mime_type, kTextXml) || 310 if (LowerCaseEqualsASCII(mime_type, kTextXml) ||
305 LowerCaseEqualsASCII(mime_type, xAppRssXml) || 311 LowerCaseEqualsASCII(mime_type, xAppRssXml) ||
306 LowerCaseEqualsASCII(mime_type, kAppXml)) { 312 LowerCaseEqualsASCII(mime_type, kAppXml)) {
307 return SiteIsolationResponseMetaData::XML; 313 return SiteIsolationResponseMetaData::XML;
308 } 314 }
309 315
310 return SiteIsolationResponseMetaData::Others; 316 return SiteIsolationResponseMetaData::Others;
311 } 317 }
312 318
313 bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) { 319 bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) {
314 // We exclude ftp:// from here. FTP doesn't provide a Content-Type 320 // We exclude ftp:// from here. FTP doesn't provide a Content-Type
315 // header which our policy depends on, so we cannot protect any 321 // header which our policy depends on, so we cannot protect any
316 // document from FTP servers. 322 // document from FTP servers.
317 return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme); 323 return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme);
318 } 324 }
319 325
320 bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin, 326 bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin,
321 const GURL& response_url) { 327 const GURL& response_url) {
322 if (!frame_origin.is_valid() || !response_url.is_valid()) 328 if (!frame_origin.is_valid() || !response_url.is_valid())
323 return false; 329 return false;
324 330
325 if (frame_origin.scheme() != response_url.scheme()) 331 if (frame_origin.scheme() != response_url.scheme())
326 return false; 332 return false;
327 333
328 // SameDomainOrHost() extracts the effective domains (public suffix plus one) 334 // SameDomainOrHost() extracts the effective domains (public suffix plus one)
329 // from the two URLs and compare them. 335 // from the two URLs and compare them.
330 return net::registry_controlled_domains::SameDomainOrHost( 336 return net::registry_controlled_domains::SameDomainOrHost(
331 frame_origin, response_url, 337 frame_origin, response_url,
332 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); 338 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
333 } 339 }
334 340
335 // We don't use Webkit's existing CORS policy implementation since 341 // We don't use Webkit's existing CORS policy implementation since
336 // their policy works in terms of origins, not sites. For example, 342 // their policy works in terms of origins, not sites. For example,
337 // when frame is sub.a.com and it is not allowed to access a document 343 // when frame is sub.a.com and it is not allowed to access a document
338 // with sub1.a.com. But under Site Isolation, it's allowed. 344 // with sub1.a.com. But under Site Isolation, it's allowed.
339 bool SiteIsolationPolicy::IsValidCorsHeaderSet( 345 bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet(
340 const GURL& frame_origin, 346 const GURL& frame_origin,
341 const GURL& website_origin, 347 const GURL& website_origin,
342 const std::string& access_control_origin) { 348 const std::string& access_control_origin) {
343 // Many websites are sending back "\"*\"" instead of "*". This is 349 // Many websites are sending back "\"*\"" instead of "*". This is
344 // non-standard practice, and not supported by Chrome. Refer to 350 // non-standard practice, and not supported by Chrome. Refer to
345 // CrossOriginAccessControl::passesAccessControlCheck(). 351 // CrossOriginAccessControl::passesAccessControlCheck().
346 352
347 // TODO(dsjang): * is not allowed for the response from a request 353 // TODO(dsjang): * is not allowed for the response from a request
348 // with cookies. This allows for more than what the renderer will 354 // with cookies. This allows for more than what the renderer will
349 // eventually be able to receive, so we won't see illegal cross-site 355 // eventually be able to receive, so we won't see illegal cross-site
350 // documents allowed by this. We have to find a way to see if this 356 // documents allowed by this. We have to find a way to see if this
351 // response is from a cookie-tagged request or not in the future. 357 // response is from a cookie-tagged request or not in the future.
352 if (access_control_origin == "*") 358 if (access_control_origin == "*")
353 return true; 359 return true;
354 360
355 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for 361 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for
356 // "*", but many websites are using just a domain for access_control_origin, 362 // "*", but many websites are using just a domain for access_control_origin,
357 // and this is blocked by Webkit's CORS logic here : 363 // and this is blocked by Webkit's CORS logic here :
358 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set 364 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set
359 // is_valid() to false when it is created from a URL containing * in the 365 // is_valid() to false when it is created from a URL containing * in the
360 // domain part. 366 // domain part.
361 367
362 GURL cors_origin(access_control_origin); 368 GURL cors_origin(access_control_origin);
363 return IsSameSite(frame_origin, cors_origin); 369 return IsSameSite(frame_origin, cors_origin);
364 } 370 }
365 371
366 // This function is a slight modification of |net::SniffForHTML|. 372 // This function is a slight modification of |net::SniffForHTML|.
367 bool SiteIsolationPolicy::SniffForHTML(StringPiece data) { 373 bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) {
368 // The content sniffer used by Chrome and Firefox are using "<!--" 374 // The content sniffer used by Chrome and Firefox are using "<!--"
369 // as one of the HTML signatures, but it also appears in valid 375 // as one of the HTML signatures, but it also appears in valid
370 // JavaScript, considered as well-formed JS by the browser. Since 376 // JavaScript, considered as well-formed JS by the browser. Since
371 // we do not want to block any JS, we exclude it from our HTML 377 // we do not want to block any JS, we exclude it from our HTML
372 // signatures. This can weaken our document block policy, but we can 378 // signatures. This can weaken our document block policy, but we can
373 // break less websites. 379 // break less websites.
374 // TODO(dsjang): parameterize |net::SniffForHTML| with an option 380 // TODO(dsjang): parameterize |net::SniffForHTML| with an option
375 // that decides whether to include <!-- or not, so that we can 381 // that decides whether to include <!-- or not, so that we can
376 // remove this function. 382 // remove this function.
377 // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser 383 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser
378 // process, we should do single-thread checking here for the static 384 // process, we should do single-thread checking here for the static
379 // initializer. 385 // initializer.
380 static const StringPiece kHtmlSignatures[] = { 386 static const StringPiece kHtmlSignatures[] = {
381 StringPiece("<!DOCTYPE html"), // HTML5 spec 387 StringPiece("<!DOCTYPE html"), // HTML5 spec
382 StringPiece("<script"), // HTML5 spec, Mozilla 388 StringPiece("<script"), // HTML5 spec, Mozilla
383 StringPiece("<html"), // HTML5 spec, Mozilla 389 StringPiece("<html"), // HTML5 spec, Mozilla
384 StringPiece("<head"), // HTML5 spec, Mozilla 390 StringPiece("<head"), // HTML5 spec, Mozilla
385 StringPiece("<iframe"), // Mozilla 391 StringPiece("<iframe"), // Mozilla
386 StringPiece("<h1"), // Mozilla 392 StringPiece("<h1"), // Mozilla
387 StringPiece("<div"), // Mozilla 393 StringPiece("<div"), // Mozilla
(...skipping 25 matching lines...) Expand all
413 if (offset == base::StringPiece::npos) 419 if (offset == base::StringPiece::npos)
414 break; 420 break;
415 421
416 // Proceed to the index next to the ending comment (-->). 422 // Proceed to the index next to the ending comment (-->).
417 data.remove_prefix(offset + strlen(kEndComment)); 423 data.remove_prefix(offset + strlen(kEndComment));
418 } 424 }
419 425
420 return false; 426 return false;
421 } 427 }
422 428
423 bool SiteIsolationPolicy::SniffForXML(base::StringPiece data) { 429 bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) {
424 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for 430 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for
425 // this signature. However, XML is case-sensitive. Don't we have to 431 // this signature. However, XML is case-sensitive. Don't we have to
426 // be more lenient only to block documents starting with the exact 432 // be more lenient only to block documents starting with the exact
427 // string <?xml rather than <?XML ? 433 // string <?xml rather than <?XML ?
428 // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser 434 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser
429 // process, we should do single-thread checking here for the static 435 // process, we should do single-thread checking here for the static
430 // initializer. 436 // initializer.
431 static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")}; 437 static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")};
432 return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures)); 438 return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures));
433 } 439 }
434 440
435 bool SiteIsolationPolicy::SniffForJSON(base::StringPiece data) { 441 bool CrossSiteDocumentClassifier::SniffForJSON(base::StringPiece data) {
436 // TODO(dsjang): We have to come up with a better way to sniff 442 // TODO(dsjang): We have to come up with a better way to sniff
437 // JSON. However, even RE cannot help us that much due to the fact 443 // JSON. However, even RE cannot help us that much due to the fact
438 // that we don't do full parsing. This DFA starts with state 0, and 444 // that we don't do full parsing. This DFA starts with state 0, and
439 // finds {, "/' and : in that order. We're avoiding adding a 445 // finds {, "/' and : in that order. We're avoiding adding a
440 // dependency on a regular expression library. 446 // dependency on a regular expression library.
441 enum { 447 enum {
442 kStartState, 448 kStartState,
443 kLeftBraceState, 449 kLeftBraceState,
444 kLeftQuoteState, 450 kLeftQuoteState,
445 kColonState, 451 kColonState,
(...skipping 25 matching lines...) Expand all
471 break; 477 break;
472 case kColonState: 478 case kColonState:
473 case kTerminalState: 479 case kTerminalState:
474 NOTREACHED(); 480 NOTREACHED();
475 break; 481 break;
476 } 482 }
477 } 483 }
478 return state == kColonState; 484 return state == kColonState;
479 } 485 }
480 486
481 bool SiteIsolationPolicy::SniffForJS(StringPiece data) { 487 bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) {
482 // TODO(dsjang): This is a real hack. The only purpose of this function is to 488 // The purpose of this function is to try to see if there's any possibility
483 // try to see if there's any possibility that this data can be JavaScript 489 // that this data can be JavaScript (superset of JS). Search for "var " for JS
484 // (superset of JS). This function will be removed once UMA stats are 490 // detection. This is a real hack and should only be used for stats gathering.
alexmos 2015/06/11 20:17:47 Wow, real hack indeed!
485 // gathered.
486
487 // Search for "var " for JS detection.
488 return data.find("var ") != base::StringPiece::npos; 491 return data.find("var ") != base::StringPiece::npos;
489 } 492 }
490 493
491 } // namespace content 494 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698