content/child/site_isolation_policy.cc - Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files

Side by Side Diff: content/child/site_isolation_policy.cc

Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@rename_policy_to_sniffer3

Patch Set: Re-upload. Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "content/child/site_isolation_policy.h"

6

7 #include "base/basictypes.h"

8 #include "base/command_line.h"

9 #include "base/lazy_instance.h"

10 #include "base/logging.h"

11 #include "base/metrics/histogram.h"

12 #include "base/strings/string_util.h"

13 #include "content/public/common/content_switches.h"

14 #include "content/public/common/resource_response_info.h"

15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"

16 #include "net/http/http_response_headers.h"

17

18 using base::StringPiece;

19

20 namespace content {

21

22 namespace {

23

24 // The gathering of UMA stats for site isolation is deactivated by default, and

25 // only activated in renderer processes.

26 static bool g_stats_gathering_enabled = false;

27

28 // MIME types

29 const char kTextHtml[] = "text/html";

30 const char kTextXml[] = "text/xml";

31 const char xAppRssXml[] = "application/rss+xml";

32 const char kAppXml[] = "application/xml";

33 const char kAppJson[] = "application/json";

34 const char kTextJson[] = "text/json";

35 const char kTextXjson[] = "text/x-json";

36 const char kTextPlain[] = "text/plain";

37

38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted

39 // when this class is used for actual blocking.

40 bool IsRenderableStatusCode(int status_code) {

41 // Chrome only uses the content of a response with one of these status codes

42 // for CSS/JavaScript. For images, Chrome just ignores status code.

43 const int renderable_status_code[] = {

44 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};

45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {

46 if (renderable_status_code[i] == status_code)

47 return true;

48 }

49 return false;

50 }

51

52 bool MatchesSignature(StringPiece data,

53 const StringPiece signatures[],

54 size_t arr_size) {

55 size_t offset = data.find_first_not_of(" \t\r\n");

56 // There is no not-whitespace character in this document.

57 if (offset == base::StringPiece::npos)

58 return false;

59

60 data.remove_prefix(offset);

61 size_t length = data.length();

62

63 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {

64 const StringPiece& signature = signatures[sig_index];

65 size_t signature_length = signature.length();

66 if (length < signature_length)

67 continue;

68

69 if (base::LowerCaseEqualsASCII(data.begin(),

70 data.begin() + signature_length,

71 signature.data()))

72 return true;

73 }

74 return false;

75 }

76

77 void IncrementHistogramCount(const std::string& name) {

78 // The default value of min, max, bucket_count are copied from histogram.h.

79 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(

80 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);

81 histogram_pointer->Add(1);

82 }

83

84 void IncrementHistogramEnum(const std::string& name,

85 uint32 sample,

86 uint32 boundary_value) {

87 // The default value of min, max, bucket_count are copied from histogram.h.

88 base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet(

89 name, 1, boundary_value, boundary_value + 1,

90 base::HistogramBase::kUmaTargetedHistogramFlag);

91 histogram_pointer->Add(sample);

92 }

93

94 void HistogramCountBlockedResponse(

95 const std::string& bucket_prefix,

96 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,

97 bool nosniff_block) {

98 std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked");

99 IncrementHistogramCount(bucket_prefix + block_label);

100

101 // The content is blocked if it is sniffed as HTML/JSON/XML. When

102 // the blocked response is with an error status code, it is not

103 // disruptive for the following reasons : 1) the blocked content is

104 // not a binary object (such as an image) since it is sniffed as

105 // text; 2) then, this blocking only breaks the renderer behavior

106 // only if it is either JavaScript or CSS. However, the renderer

107 // doesn't use the contents of JS/CSS with unaffected status code

108 // (e.g, 404). 3) the renderer is expected not to use the cross-site

109 // document content for purposes other than JS/CSS (e.g, XHR).

110 bool renderable_status_code =

111 IsRenderableStatusCode(resp_data->http_status_code);

112

113 if (renderable_status_code) {

114 IncrementHistogramEnum(

115 bucket_prefix + block_label + ".RenderableStatusCode",

116 resp_data->resource_type, RESOURCE_TYPE_LAST_TYPE);

117 } else {

118 IncrementHistogramCount(bucket_prefix + block_label +

119 ".NonRenderableStatusCode");

120 }

121 }

122

123 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix,

124 bool sniffed_as_js) {

125 IncrementHistogramCount(bucket_prefix + ".NotBlocked");

126 if (sniffed_as_js)

127 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS");

128 }

129

130 } // namespace

131

132 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {

133 }

134

135 void SiteIsolationStatsGatherer::SetEnabled(bool enabled) {

136 g_stats_gathering_enabled = enabled;

137 }

138

139 linked_ptr<SiteIsolationResponseMetaData>

140 SiteIsolationStatsGatherer::OnReceivedResponse(

141 const GURL& frame_origin,

142 const GURL& response_url,

143 ResourceType resource_type,

144 int origin_pid,

145 const ResourceResponseInfo& info) {

146 if (!g_stats_gathering_enabled)

147 return linked_ptr<SiteIsolationResponseMetaData>();

148

149 // if \|origin_pid\| is non-zero, it means that this response is for a plugin

150 // spawned from this renderer process. We exclude responses for plugins for

151 // now, but eventually, we're going to make plugin processes directly talk to

152 // the browser process so that we don't apply cross-site document blocking to

153 // them.

154 if (origin_pid)

155 return linked_ptr<SiteIsolationResponseMetaData>();

156

157 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);

158

159 // See if this is for navigation. If it is, don't block it, under the

160 // assumption that we will put it in an appropriate process.

161 if (IsResourceTypeFrame(resource_type))

162 return linked_ptr<SiteIsolationResponseMetaData>();

163

164 if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url))

165 return linked_ptr<SiteIsolationResponseMetaData>();

166

167 if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url))

168 return linked_ptr<SiteIsolationResponseMetaData>();

169

170 CrossSiteDocumentMimeType canonical_mime_type =

171 CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type);

172

173 if (canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS)

174 return linked_ptr<SiteIsolationResponseMetaData>();

175

176 // Every CORS request should have the Access-Control-Allow-Origin header even

177 // if it is preceded by a pre-flight request. Therefore, if this is a CORS

178 // request, it has this header. response.httpHeaderField() internally uses

179 // case-insensitive matching for the header name.

180 std::string access_control_origin;

181

182 // We can use a case-insensitive header name for EnumerateHeader().

183 info.headers->EnumerateHeader(NULL, "access-control-allow-origin",

184 &access_control_origin);

185 if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet(

186 frame_origin, response_url, access_control_origin))

187 return linked_ptr<SiteIsolationResponseMetaData>();

188

189 // Real XSD data collection starts from here.

190 std::string no_sniff;

191 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);

192

193 linked_ptr<SiteIsolationResponseMetaData> resp_data(

194 new SiteIsolationResponseMetaData);

195 resp_data->frame_origin = frame_origin.spec();

196 resp_data->response_url = response_url;

197 resp_data->resource_type = resource_type;

198 resp_data->canonical_mime_type = canonical_mime_type;

199 resp_data->http_status_code = info.headers->response_code();

200 resp_data->no_sniff = base::LowerCaseEqualsASCII(no_sniff, "nosniff");

201

202 return resp_data;

203 }

204

205 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(

206 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,

207 const char* raw_data,

208 int raw_length) {

209 if (!g_stats_gathering_enabled)

210 return false;

211

212 DCHECK(resp_data.get());

213

214 StringPiece data(raw_data, raw_length);

215

216 // Record the length of the first received chunk of data to see if it's enough

217 // for sniffing.

218 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);

219

220 // Record the number of cross-site document responses with a specific mime

221 // type (text/html, text/xml, etc).

222 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",

223 resp_data->canonical_mime_type,

224 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX);

225

226 // Store the result of cross-site document blocking analysis.

227 bool would_block = false;

228 bool sniffed_as_js = SniffForJS(data);

229

230 // Record the number of responses whose content is sniffed for what its mime

231 // type claims it to be. For example, we apply a HTML sniffer for a document

232 // tagged with text/html here. Whenever this check becomes true, we'll block

233 // the response.

234 if (resp_data->canonical_mime_type != CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN) {

235 std::string bucket_prefix;

236 bool sniffed_as_target_document = false;

237 if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_HTML) {

238 bucket_prefix = "SiteIsolation.XSD.HTML";

239 sniffed_as_target_document =

240 CrossSiteDocumentClassifier::SniffForHTML(data);

241 } else if (resp_data->canonical_mime_type ==

242 CROSS_SITE_DOCUMENT_MIME_TYPE_XML) {

243 bucket_prefix = "SiteIsolation.XSD.XML";

244 sniffed_as_target_document =

245 CrossSiteDocumentClassifier::SniffForXML(data);

246 } else if (resp_data->canonical_mime_type ==

247 CROSS_SITE_DOCUMENT_MIME_TYPE_JSON) {

248 bucket_prefix = "SiteIsolation.XSD.JSON";

249 sniffed_as_target_document =

250 CrossSiteDocumentClassifier::SniffForJSON(data);

251 } else {

252 NOTREACHED() << "Not a blockable mime type: "

253 << resp_data->canonical_mime_type;

254 }

255

256 if (sniffed_as_target_document) {

257 would_block = true;

258 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);

259 } else {

260 if (resp_data->no_sniff) {

261 would_block = true;

262 HistogramCountBlockedResponse(bucket_prefix, resp_data, true);

263 } else {

264 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js);

265 }

266 }

267 } else {

268 // This block is for plain text documents. We apply our HTML, XML,

269 // and JSON sniffer to a text document in the order, and block it

270 // if any of them succeeds in sniffing.

271 std::string bucket_prefix;

272 if (CrossSiteDocumentClassifier::SniffForHTML(data))

273 bucket_prefix = "SiteIsolation.XSD.Plain.HTML";

274 else if (CrossSiteDocumentClassifier::SniffForXML(data))

275 bucket_prefix = "SiteIsolation.XSD.Plain.XML";

276 else if (CrossSiteDocumentClassifier::SniffForJSON(data))

277 bucket_prefix = "SiteIsolation.XSD.Plain.JSON";

278

279 if (bucket_prefix.size() > 0) {

280 would_block = true;

281 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);

282 } else if (resp_data->no_sniff) {

283 would_block = true;

284 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);

285 } else {

286 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",

287 sniffed_as_js);

288 }

289 }

290

291 return would_block;

292 }

293

294 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType(

295 const std::string& mime_type) {

296 if (base::LowerCaseEqualsASCII(mime_type, kTextHtml)) {

297 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML;

298 }

299

300 if (base::LowerCaseEqualsASCII(mime_type, kTextPlain)) {

301 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN;

302 }

303

304 if (base::LowerCaseEqualsASCII(mime_type, kAppJson) \|\|

305 base::LowerCaseEqualsASCII(mime_type, kTextJson) \|\|

306 base::LowerCaseEqualsASCII(mime_type, kTextXjson)) {

307 return CROSS_SITE_DOCUMENT_MIME_TYPE_JSON;

308 }

309

310 if (base::LowerCaseEqualsASCII(mime_type, kTextXml) \|\|

311 base::LowerCaseEqualsASCII(mime_type, xAppRssXml) \|\|

312 base::LowerCaseEqualsASCII(mime_type, kAppXml)) {

313 return CROSS_SITE_DOCUMENT_MIME_TYPE_XML;

314 }

315

316 return CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS;

317 }

318

319 bool CrossSiteDocumentClassifier::IsBlockableScheme(const GURL& url) {

320 // We exclude ftp:// from here. FTP doesn't provide a Content-Type

321 // header which our policy depends on, so we cannot protect any

322 // document from FTP servers.

323 return url.SchemeIs(url::kHttpScheme) \|\| url.SchemeIs(url::kHttpsScheme);

324 }

325

326 bool CrossSiteDocumentClassifier::IsSameSite(const GURL& frame_origin,

327 const GURL& response_url) {

328 if (!frame_origin.is_valid() \|\| !response_url.is_valid())

329 return false;

330

331 if (frame_origin.scheme() != response_url.scheme())

332 return false;

333

334 // SameDomainOrHost() extracts the effective domains (public suffix plus one)

335 // from the two URLs and compare them.

336 return net::registry_controlled_domains::SameDomainOrHost(

337 frame_origin, response_url,

338 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);

339 }

340

341 // We don't use Webkit's existing CORS policy implementation since

342 // their policy works in terms of origins, not sites. For example,

343 // when frame is sub.a.com and it is not allowed to access a document

344 // with sub1.a.com. But under Site Isolation, it's allowed.

345 bool CrossSiteDocumentClassifier::IsValidCorsHeaderSet(

346 const GURL& frame_origin,

347 const GURL& website_origin,

348 const std::string& access_control_origin) {

349 // Many websites are sending back "\"\"" instead of "". This is

350 // non-standard practice, and not supported by Chrome. Refer to

351 // CrossOriginAccessControl::passesAccessControlCheck().

352

353 // TODO(dsjang): * is not allowed for the response from a request

354 // with cookies. This allows for more than what the renderer will

355 // eventually be able to receive, so we won't see illegal cross-site

356 // documents allowed by this. We have to find a way to see if this

357 // response is from a cookie-tagged request or not in the future.

358 if (access_control_origin == "*")

359 return true;

360

361 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for

362 // "*", but many websites are using just a domain for access_control_origin,

363 // and this is blocked by Webkit's CORS logic here :

364 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set

365 // is_valid() to false when it is created from a URL containing * in the

366 // domain part.

367

368 GURL cors_origin(access_control_origin);

369 return IsSameSite(frame_origin, cors_origin);

370 }

371

372 // This function is a slight modification of \|net::SniffForHTML\|.

373 bool CrossSiteDocumentClassifier::SniffForHTML(StringPiece data) {

374 // The content sniffer used by Chrome and Firefox are using "<!--"

375 // as one of the HTML signatures, but it also appears in valid

376 // JavaScript, considered as well-formed JS by the browser. Since

377 // we do not want to block any JS, we exclude it from our HTML

378 // signatures. This can weaken our document block policy, but we can

379 // break less websites.

380 // TODO(dsjang): parameterize \|net::SniffForHTML\| with an option

381 // that decides whether to include <!-- or not, so that we can

382 // remove this function.

383 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser

384 // process, we should do single-thread checking here for the static

385 // initializer.

386 static const StringPiece kHtmlSignatures[] = {

387 StringPiece("<!DOCTYPE html"), // HTML5 spec

388 StringPiece("<script"), // HTML5 spec, Mozilla

389 StringPiece("<html"), // HTML5 spec, Mozilla

390 StringPiece("<head"), // HTML5 spec, Mozilla

391 StringPiece("<iframe"), // Mozilla

392 StringPiece("<h1"), // Mozilla

393 StringPiece("<div"), // Mozilla

394 StringPiece("<font"), // Mozilla

395 StringPiece("<table"), // Mozilla

396 StringPiece("<a"), // Mozilla

397 StringPiece("<style"), // Mozilla

398 StringPiece("<title"), // Mozilla

399 StringPiece("<b"), // Mozilla

400 StringPiece("<body"), // Mozilla

401 StringPiece("<br"), // Mozilla

402 StringPiece("<p"), // Mozilla

403 StringPiece("<?xml") // Mozilla

404 };

405

406 while (data.length() > 0) {

407 if (MatchesSignature(data, kHtmlSignatures, arraysize(kHtmlSignatures)))

408 return true;

409

410 // If we cannot find "<!--", we fail sniffing this as HTML.

411 static const StringPiece kCommentBegins[] = {StringPiece("<!--")};

412 if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins)))

413 break;

414

415 // Search for --> and do SniffForHTML after that. If we can find the

416 // comment's end, we start HTML sniffing from there again.

417 static const char kEndComment[] = "-->";

418 size_t offset = data.find(kEndComment);

419 if (offset == base::StringPiece::npos)

420 break;

421

422 // Proceed to the index next to the ending comment (-->).

423 data.remove_prefix(offset + strlen(kEndComment));

424 }

425

426 return false;

427 }

428

429 bool CrossSiteDocumentClassifier::SniffForXML(base::StringPiece data) {

430 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for

431 // this signature. However, XML is case-sensitive. Don't we have to

432 // be more lenient only to block documents starting with the exact

433 // string <?xml rather than <?XML ?

434 // TODO(dsjang): Once CrossSiteDocumentClassifier is moved into the browser

435 // process, we should do single-thread checking here for the static

436 // initializer.

437 static const StringPiece kXmlSignatures[] = {StringPiece("<?xml")};

438 return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures));

439 }

440

441 bool CrossSiteDocumentClassifier::SniffForJSON(base::StringPiece data) {

442 // TODO(dsjang): We have to come up with a better way to sniff

443 // JSON. However, even RE cannot help us that much due to the fact

444 // that we don't do full parsing. This DFA starts with state 0, and

445 // finds {, "/' and : in that order. We're avoiding adding a

446 // dependency on a regular expression library.

447 enum {

448 kStartState,

449 kLeftBraceState,

450 kLeftQuoteState,

451 kColonState,

452 kTerminalState,

453 } state = kStartState;

454

455 size_t length = data.length();

456 for (size_t i = 0; i < length && state < kColonState; ++i) {

457 const char c = data[i];

458 if (c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n')

459 continue;

460

461 switch (state) {

462 case kStartState:

463 if (c == '{')

464 state = kLeftBraceState;

465 else

466 state = kTerminalState;

467 break;

468 case kLeftBraceState:

469 if (c == '\"' \|\| c == '\'')

470 state = kLeftQuoteState;

471 else

472 state = kTerminalState;

473 break;

474 case kLeftQuoteState:

475 if (c == ':')

476 state = kColonState;

477 break;

478 case kColonState:

479 case kTerminalState:

480 NOTREACHED();

481 break;

482 }

483 }

484 return state == kColonState;

485 }

486

487 bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) {

488 // The purpose of this function is to try to see if there's any possibility

489 // that this data can be JavaScript (superset of JS). Search for "var " for JS

490 // detection. This is a real hack and should only be used for stats gathering.

491 return data.find("var ") != base::StringPiece::npos;

492 }

493

494 } // namespace content

OLD	NEW

« no previous file with comments | « content/child/site_isolation_policy.h ('k') | content/child/site_isolation_policy_browsertest.cc » ('j') | no next file with comments »