content/common/cross_site_document_classifier.cc - Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files

Side by Side Diff: content/common/cross_site_document_classifier.cc

Issue 1174323002: [Patch 5 of 6] Split out the site_isolation_policy files (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@rename_policy_to_sniffer3

Patch Set: Pull Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« content/child/site_isolation_stats_gatherer_browsertest.cc ('K') | « content/common/cross_site_document_classifier.h ('k') | content/common/cross_site_document_classifier_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.
	alexmos 2015/06/13 00:27:03 2015 2015 ncarter (slow) 2015/06/16 22:43:22 Done. Show quoted text On 2015/06/13 00:27:03, alexmos wrote: > 2015 Done.
2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/child/site_isolation_policy.h"	5 #include "content/common/cross_site_document_classifier.h"

6	6

7 #include "base/basictypes.h"	7 #include "base/basictypes.h"

8 #include "base/command_line.h"	8 #include "base/command_line.h"

9 #include "base/lazy_instance.h"	9 #include "base/lazy_instance.h"

10 #include "base/logging.h"	10 #include "base/logging.h"

11 #include "base/metrics/histogram.h"	11 #include "base/metrics/histogram.h"

12 #include "base/strings/string_util.h"	12 #include "base/strings/string_util.h"

13 #include "content/public/common/content_switches.h"	13 #include "content/public/common/content_switches.h"

14 #include "content/public/common/resource_response_info.h"	14 #include "content/public/common/resource_response_info.h"

15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"	15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"

16 #include "net/http/http_response_headers.h"	16 #include "net/http/http_response_headers.h"

17	17

18 using base::StringPiece;	18 using base::StringPiece;

19	19

20 namespace content {	20 namespace content {

21	21

22 namespace {	22 namespace {

23	23

24 // The gathering of UMA stats for site isolation is deactivated by default, and

25 // only activated in renderer processes.

26 static bool g_stats_gathering_enabled = false;

27

28 // MIME types	24 // MIME types

29 const char kTextHtml[] = "text/html";	25 const char kTextHtml[] = "text/html";

30 const char kTextXml[] = "text/xml";	26 const char kTextXml[] = "text/xml";

31 const char xAppRssXml[] = "application/rss+xml";	27 const char xAppRssXml[] = "application/rss+xml";

32 const char kAppXml[] = "application/xml";	28 const char kAppXml[] = "application/xml";

33 const char kAppJson[] = "application/json";	29 const char kAppJson[] = "application/json";

34 const char kTextJson[] = "text/json";	30 const char kTextJson[] = "text/json";

35 const char kTextXjson[] = "text/x-json";	31 const char kTextXjson[] = "text/x-json";

36 const char kTextPlain[] = "text/plain";	32 const char kTextPlain[] = "text/plain";

37	33

38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted

39 // when this class is used for actual blocking.

40 bool IsRenderableStatusCode(int status_code) {

41 // Chrome only uses the content of a response with one of these status codes

42 // for CSS/JavaScript. For images, Chrome just ignores status code.

43 const int renderable_status_code[] = {

44 200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307};

45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {

46 if (renderable_status_code[i] == status_code)

47 return true;

48 }

49 return false;

50 }

51

52 bool MatchesSignature(StringPiece data,	34 bool MatchesSignature(StringPiece data,

53 const StringPiece signatures[],	35 const StringPiece signatures[],

54 size_t arr_size) {	36 size_t arr_size) {

55 size_t offset = data.find_first_not_of(" \t\r\n");	37 size_t offset = data.find_first_not_of(" \t\r\n");

56 // There is no not-whitespace character in this document.	38 // There is no not-whitespace character in this document.

57 if (offset == base::StringPiece::npos)	39 if (offset == base::StringPiece::npos)

58 return false;	40 return false;

59	41

60 data.remove_prefix(offset);	42 data.remove_prefix(offset);

61 size_t length = data.length();	43 size_t length = data.length();

62	44

63 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {	45 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {

64 const StringPiece& signature = signatures[sig_index];	46 const StringPiece& signature = signatures[sig_index];

65 size_t signature_length = signature.length();	47 size_t signature_length = signature.length();

66 if (length < signature_length)	48 if (length < signature_length)

67 continue;	49 continue;

68	50

69 if (LowerCaseEqualsASCII(data.begin(), data.begin() + signature_length,	51 if (LowerCaseEqualsASCII(data.begin(), data.begin() + signature_length,

70 signature.data()))	52 signature.data()))

71 return true;	53 return true;

72 }	54 }

73 return false;	55 return false;

74 }	56 }

75	57

76 void IncrementHistogramCount(const std::string& name) {

77 // The default value of min, max, bucket_count are copied from histogram.h.

78 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(

79 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);

80 histogram_pointer->Add(1);

81 }

82

83 void IncrementHistogramEnum(const std::string& name,

84 uint32 sample,

85 uint32 boundary_value) {

86 // The default value of min, max, bucket_count are copied from histogram.h.

87 base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet(

88 name, 1, boundary_value, boundary_value + 1,

89 base::HistogramBase::kUmaTargetedHistogramFlag);

90 histogram_pointer->Add(sample);

91 }

92

93 void HistogramCountBlockedResponse(

94 const std::string& bucket_prefix,

95 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,

96 bool nosniff_block) {

97 std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked");

98 IncrementHistogramCount(bucket_prefix + block_label);

99

100 // The content is blocked if it is sniffed as HTML/JSON/XML. When

101 // the blocked response is with an error status code, it is not

102 // disruptive for the following reasons : 1) the blocked content is

103 // not a binary object (such as an image) since it is sniffed as

104 // text; 2) then, this blocking only breaks the renderer behavior

105 // only if it is either JavaScript or CSS. However, the renderer

106 // doesn't use the contents of JS/CSS with unaffected status code

107 // (e.g, 404). 3) the renderer is expected not to use the cross-site

108 // document content for purposes other than JS/CSS (e.g, XHR).

109 bool renderable_status_code =

110 IsRenderableStatusCode(resp_data->http_status_code);

111

112 if (renderable_status_code) {

113 IncrementHistogramEnum(

114 bucket_prefix + block_label + ".RenderableStatusCode",

115 resp_data->resource_type, RESOURCE_TYPE_LAST_TYPE);

116 } else {

117 IncrementHistogramCount(bucket_prefix + block_label +

118 ".NonRenderableStatusCode");

119 }

120 }

121

122 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix,

123 bool sniffed_as_js) {

124 IncrementHistogramCount(bucket_prefix + ".NotBlocked");

125 if (sniffed_as_js)

126 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS");

127 }

128

129 } // namespace	58 } // namespace

130	59

131 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {

132 }

133

134 void SiteIsolationStatsGatherer::SetEnabled(bool enabled) {

135 g_stats_gathering_enabled = enabled;

136 }

137

138 linked_ptr<SiteIsolationResponseMetaData>

139 SiteIsolationStatsGatherer::OnReceivedResponse(

140 const GURL& frame_origin,

141 const GURL& response_url,

142 ResourceType resource_type,

143 int origin_pid,

144 const ResourceResponseInfo& info) {

145 if (!g_stats_gathering_enabled)

146 return linked_ptr<SiteIsolationResponseMetaData>();

147

148 // if \|origin_pid\| is non-zero, it means that this response is for a plugin

149 // spawned from this renderer process. We exclude responses for plugins for

150 // now, but eventually, we're going to make plugin processes directly talk to

151 // the browser process so that we don't apply cross-site document blocking to

152 // them.

153 if (origin_pid)

154 return linked_ptr<SiteIsolationResponseMetaData>();

155

156 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);

157

158 // See if this is for navigation. If it is, don't block it, under the

159 // assumption that we will put it in an appropriate process.

160 if (IsResourceTypeFrame(resource_type))

161 return linked_ptr<SiteIsolationResponseMetaData>();

162

163 if (!CrossSiteDocumentClassifier::IsBlockableScheme(response_url))

164 return linked_ptr<SiteIsolationResponseMetaData>();

165

166 if (CrossSiteDocumentClassifier::IsSameSite(frame_origin, response_url))

167 return linked_ptr<SiteIsolationResponseMetaData>();

168

169 CrossSiteDocumentMimeType canonical_mime_type =

170 CrossSiteDocumentClassifier::GetCanonicalMimeType(info.mime_type);

171

172 if (canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_OTHERS)

173 return linked_ptr<SiteIsolationResponseMetaData>();

174

175 // Every CORS request should have the Access-Control-Allow-Origin header even

176 // if it is preceded by a pre-flight request. Therefore, if this is a CORS

177 // request, it has this header. response.httpHeaderField() internally uses

178 // case-insensitive matching for the header name.

179 std::string access_control_origin;

180

181 // We can use a case-insensitive header name for EnumerateHeader().

182 info.headers->EnumerateHeader(NULL, "access-control-allow-origin",

183 &access_control_origin);

184 if (CrossSiteDocumentClassifier::IsValidCorsHeaderSet(

185 frame_origin, response_url, access_control_origin))

186 return linked_ptr<SiteIsolationResponseMetaData>();

187

188 // Real XSD data collection starts from here.

189 std::string no_sniff;

190 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);

191

192 linked_ptr<SiteIsolationResponseMetaData> resp_data(

193 new SiteIsolationResponseMetaData);

194 resp_data->frame_origin = frame_origin.spec();

195 resp_data->response_url = response_url;

196 resp_data->resource_type = resource_type;

197 resp_data->canonical_mime_type = canonical_mime_type;

198 resp_data->http_status_code = info.headers->response_code();

199 resp_data->no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff");

200

201 return resp_data;

202 }

203

204 bool SiteIsolationStatsGatherer::OnReceivedFirstChunk(

205 const linked_ptr<SiteIsolationResponseMetaData>& resp_data,

206 const char* raw_data,

207 int raw_length) {

208 if (!g_stats_gathering_enabled)

209 return false;

210

211 DCHECK(resp_data.get());

212

213 StringPiece data(raw_data, raw_length);

214

215 // Record the length of the first received chunk of data to see if it's enough

216 // for sniffing.

217 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);

218

219 // Record the number of cross-site document responses with a specific mime

220 // type (text/html, text/xml, etc).

221 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType",

222 resp_data->canonical_mime_type,

223 CROSS_SITE_DOCUMENT_MIME_TYPE_MAX);

224

225 // Store the result of cross-site document blocking analysis.

226 bool would_block = false;

227 bool sniffed_as_js = SniffForJS(data);

228

229 // Record the number of responses whose content is sniffed for what its mime

230 // type claims it to be. For example, we apply a HTML sniffer for a document

231 // tagged with text/html here. Whenever this check becomes true, we'll block

232 // the response.

233 if (resp_data->canonical_mime_type != CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN) {

234 std::string bucket_prefix;

235 bool sniffed_as_target_document = false;

236 if (resp_data->canonical_mime_type == CROSS_SITE_DOCUMENT_MIME_TYPE_HTML) {

237 bucket_prefix = "SiteIsolation.XSD.HTML";

238 sniffed_as_target_document =

239 CrossSiteDocumentClassifier::SniffForHTML(data);

240 } else if (resp_data->canonical_mime_type ==

241 CROSS_SITE_DOCUMENT_MIME_TYPE_XML) {

242 bucket_prefix = "SiteIsolation.XSD.XML";

243 sniffed_as_target_document =

244 CrossSiteDocumentClassifier::SniffForXML(data);

245 } else if (resp_data->canonical_mime_type ==

246 CROSS_SITE_DOCUMENT_MIME_TYPE_JSON) {

247 bucket_prefix = "SiteIsolation.XSD.JSON";

248 sniffed_as_target_document =

249 CrossSiteDocumentClassifier::SniffForJSON(data);

250 } else {

251 NOTREACHED() << "Not a blockable mime type: "

252 << resp_data->canonical_mime_type;

253 }

254

255 if (sniffed_as_target_document) {

256 would_block = true;

257 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);

258 } else {

259 if (resp_data->no_sniff) {

260 would_block = true;

261 HistogramCountBlockedResponse(bucket_prefix, resp_data, true);

262 } else {

263 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js);

264 }

265 }

266 } else {

267 // This block is for plain text documents. We apply our HTML, XML,

268 // and JSON sniffer to a text document in the order, and block it

269 // if any of them succeeds in sniffing.

270 std::string bucket_prefix;

271 if (CrossSiteDocumentClassifier::SniffForHTML(data))

272 bucket_prefix = "SiteIsolation.XSD.Plain.HTML";

273 else if (CrossSiteDocumentClassifier::SniffForXML(data))

274 bucket_prefix = "SiteIsolation.XSD.Plain.XML";

275 else if (CrossSiteDocumentClassifier::SniffForJSON(data))

276 bucket_prefix = "SiteIsolation.XSD.Plain.JSON";

277

278 if (bucket_prefix.size() > 0) {

279 would_block = true;

280 HistogramCountBlockedResponse(bucket_prefix, resp_data, false);

281 } else if (resp_data->no_sniff) {

282 would_block = true;

283 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);

284 } else {

285 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",

286 sniffed_as_js);

287 }

288 }

289

290 return would_block;

291 }

292

293 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType(	60 CrossSiteDocumentMimeType CrossSiteDocumentClassifier::GetCanonicalMimeType(

294 const std::string& mime_type) {	61 const std::string& mime_type) {

295 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {	62 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {

296 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML;	63 return CROSS_SITE_DOCUMENT_MIME_TYPE_HTML;

297 }	64 }

298	65

299 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {	66 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {

300 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN;	67 return CROSS_SITE_DOCUMENT_MIME_TYPE_PLAIN;

301 }	68 }

302	69

(...skipping 173 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
476 break;	243 break;

477 case kColonState:	244 case kColonState:

478 case kTerminalState:	245 case kTerminalState:

479 NOTREACHED();	246 NOTREACHED();

480 break;	247 break;

481 }	248 }

482 }	249 }

483 return state == kColonState;	250 return state == kColonState;

484 }	251 }

485	252

486 bool SiteIsolationStatsGatherer::SniffForJS(StringPiece data) {

487 // The purpose of this function is to try to see if there's any possibility

488 // that this data can be JavaScript (superset of JS). Search for "var " for JS

489 // detection. This is a real hack and should only be used for stats gathering.

490 return data.find("var ") != base::StringPiece::npos;

491 }

492

493 } // namespace content	253 } // namespace content

OLD	NEW