OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "webkit/child/site_isolation_policy.h" | |
6 | |
7 #include "base/basictypes.h" | |
8 #include "base/logging.h" | |
9 #include "base/metrics/histogram.h" | |
10 #include "base/strings/string_util.h" | |
11 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" | |
12 #include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h" | |
13 #include "third_party/WebKit/public/platform/WebString.h" | |
14 #include "third_party/WebKit/public/platform/WebURL.h" | |
15 #include "third_party/WebKit/public/platform/WebURLRequest.h" | |
16 #include "third_party/WebKit/public/platform/WebURLResponse.h" | |
17 #include "third_party/WebKit/public/web/WebDocument.h" | |
18 #include "third_party/WebKit/public/web/WebFrame.h" | |
19 #include "third_party/WebKit/public/web/WebFrameClient.h" | |
20 #include "third_party/WebKit/public/web/WebSecurityOrigin.h" | |
21 | |
22 using base::strncasecmp; | |
23 using WebKit::WebDocument; | |
24 using WebKit::WebString; | |
25 using WebKit::WebURL; | |
26 using WebKit::WebURLResponse; | |
27 using WebKit::WebURLRequest; | |
28 | |
29 | |
30 namespace webkit_glue { | |
31 | |
32 std::map<unsigned, WebURLRequest::TargetType> | |
33 SiteIsolationPolicy::id_target_map_; | |
34 std::map<std::string, ResponseMetaData> | |
35 SiteIsolationPolicy::url_responsedata_map_; | |
36 std::map<unsigned, std::string> SiteIsolationPolicy::id_url_map_; | |
37 | |
38 void SiteIsolationPolicy::WillSendRequest( | |
39 unsigned identifier, | |
40 WebURLRequest::TargetType target_type) { | |
41 // When identifier already exists in the map, it means that this | |
42 // request has been redirected to issue another request. We don't | |
43 // overwrite the existing target_type since it becomes | |
44 // TargetIsSubresource no matter what the original target_type was. | |
45 if (!id_target_map_.count(identifier)) | |
46 id_target_map_[identifier] = target_type; | |
47 } | |
48 | |
49 void SiteIsolationPolicy::DidReceiveResponse(WebKit::WebFrame* frame, | |
50 unsigned identifier, | |
51 const WebURLResponse& response) { | |
52 DCHECK_EQ(id_target_map_.count(identifier),1U); | |
53 | |
54 UMA_HISTOGRAM_COUNTS("SiteIsolation.ALL", 1); | |
Charlie Reis
2013/08/09 18:48:38
ALL -> AllResponses
nasko
2013/08/09 19:07:28
nit: TOTAL?
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
AllResponses seems more informative than TOTAL to
| |
55 | |
56 GURL response_url = response.url(); | |
57 WebURLRequest::TargetType target_type = id_target_map_[identifier]; | |
58 id_target_map_.erase(identifier); | |
59 | |
60 // See if this is for navigation. If it is, don't block it, under | |
61 // the assumption that we will put it in an appropriate process. | |
62 if (IsFrameNavigating(frame)) { | |
63 LOG(INFO) << "SiteIsolationPolicy.FrameInNavigation"; | |
64 return; | |
65 } | |
66 | |
67 GURL frame_origin(frame->document().securityOrigin().toString()); | |
68 | |
69 if (!IsBlockableScheme(frame_origin)) { | |
70 LOG(INFO) << "SiteIsolationPolicy.NotNetworkScheme:" << frame_origin; | |
71 return; | |
72 } | |
73 | |
74 if (IsSameSite(frame_origin, response_url)) { | |
75 LOG(INFO) << "SiteIsolationPolicy.SameSite:" << frame_origin << "," | |
76 << response_url; | |
77 return; | |
78 } | |
79 | |
80 ResponseMetaData::CanonicalMimeType canonical_mime_type = | |
81 GetCanonicalMimeType(response); | |
82 | |
83 if (canonical_mime_type == ResponseMetaData::Others) { | |
84 LOG(INFO) << "SiteIsolationPolicy.mimetype:" << frame_origin << "," | |
85 << response_url << ",[" << response.mimeType().utf8() << "]"; | |
86 return; | |
87 } | |
88 | |
89 // Every CORS request should have the Access-Control-Allow-Origin | |
90 // header even if it is preceded by a pre-flight request. Therefore, | |
91 // if this is a CORS request, it has this header. | |
92 std::string access_control_origin = response.httpHeaderField( | |
93 WebKit::WebString::fromUTF8("Access-Control-Allow-Origin")).utf8(); | |
94 | |
95 if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) { | |
96 LOG(INFO) << "SiteIsolationPolicy.CorsIsSafe:"; | |
97 return; | |
98 } | |
99 | |
100 // Real XSD data collection starts from here. | |
101 LOG(INFO) << "SiteIsolationPolicy.XSD:from header:" << canonical_mime_type << | |
102 ":" << response_url; | |
103 | |
104 // TODO(dsjang): Apply X-Content-Type option here. | |
105 ResponseMetaData resp_data; | |
106 resp_data.frame_origin = frame_origin.spec(); | |
107 resp_data.response_url = response_url.spec(); | |
108 resp_data.request_identifier = identifier; | |
109 resp_data.target_type = target_type; | |
110 resp_data.canonical_mime_type = canonical_mime_type; | |
111 resp_data.http_status_code = response.httpStatusCode(); | |
112 | |
113 url_responsedata_map_[resp_data.response_url] = resp_data; | |
114 id_url_map_[identifier] = resp_data.response_url; | |
115 | |
116 return; | |
117 } | |
118 | |
119 #define SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \ | |
120 UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".Blocked", 1); \ | |
121 if (ok_status_code) { \ | |
122 UMA_HISTOGRAM_ENUMERATION( \ | |
123 ""BUCKET_PREFIX".Blocked.OKStatusCode", \ | |
124 resp_data.target_type, \ | |
125 WebURLRequest::TargetIsUnspecified + 1); \ | |
126 } else { \ | |
127 UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".Blocked.ErrorStatusCode", 1); \ | |
128 } | |
129 | |
130 #define SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \ | |
131 UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".NotBlocked", 1); \ | |
132 if (is_sniffed_for_js) \ | |
133 UMA_HISTOGRAM_COUNTS(""BUCKET_PREFIX".NotBlocked.MaybeJS", 1); \ | |
134 | |
135 #define SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SNIFF_EXPR,BUCKET_PREFIX) \ | |
136 if (SNIFF_EXPR) { \ | |
137 SITE_ISOLATION_POLICY_COUNT_BLOCK(BUCKET_PREFIX) \ | |
138 } else { \ | |
139 SITE_ISOLATION_POLICY_COUNT_NOTBLOCK(BUCKET_PREFIX) \ | |
140 } | |
141 | |
142 void SiteIsolationPolicy::DidReceiveData(const char* data, | |
143 int length, | |
144 WebURL& web_response_url) { | |
145 // We only record XSDs whose content is actually non-zero. | |
Charlie Reis
2013/08/09 18:48:38
Make sure XSDs is spelled out somewhere (perhaps i
nasko
2013/08/09 19:07:28
How do we know the content is non-zero based on th
dsjang
2013/08/12 22:56:17
This means that we can't see zero responses here.
dsjang
2013/08/12 22:56:17
We don't know. What I meant was that DidReceiveDat
| |
146 GURL response_url(web_response_url); | |
147 | |
148 std::string response_url_str = response_url.spec(); | |
149 if (url_responsedata_map_.count(response_url_str) == 0) | |
150 return; | |
151 | |
152 DCHECK_EQ(url_responsedata_map_.count(response_url_str), 1U); | |
153 ResponseMetaData resp_data = url_responsedata_map_[response_url_str]; | |
154 url_responsedata_map_.erase(response_url_str); | |
155 | |
156 // Record the length of the first received network packet to see if | |
157 // it's enough for sniffing. | |
158 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", length); | |
159 | |
160 // Record the entire number of responses with a specific mime | |
Charlie Reis
2013/08/09 18:48:38
number of XSD reponses
dsjang
2013/08/12 22:56:17
Done.
| |
161 // type(text/html, text/xml, etc). | |
Charlie Reis
2013/08/09 18:48:38
nit: space after type
dsjang
2013/08/12 22:56:17
Done.
| |
162 UMA_HISTOGRAM_ENUMERATION("SiteIsolation.XSD.MimeType", | |
163 resp_data.canonical_mime_type, | |
164 ResponseMetaData::MaxCanonicalMimeType); | |
165 | |
166 // Blocking only happens when the content is sniffed for | |
167 // HTML/JSON/XML. So if the status code is an error status code, it | |
Charlie Reis
2013/08/09 18:48:38
it -> blocking it
dsjang
2013/08/12 22:56:17
Done.
| |
168 // is not disruptive by the following reasons : 1) the blocked | |
169 // content is not a binary object (such as an image) since it is | |
170 // sniffed as a text document. 2) then, this blocking only breaks | |
Charlie Reis
2013/08/09 18:48:38
nit: Either capitalize each sentence or use a semi
dsjang
2013/08/12 22:56:17
Done.
| |
171 // the renderer behavior only if it is either JavaScript or | |
172 // CSS. However, the renderer doesn't use the contents of JS/CSS | |
173 // with unaffected status code(e.g, 404). *) the renderer is | |
Charlie Reis
2013/08/09 18:48:38
nit: space after code
Also, why "*)" rather than "
| |
174 // expected not to use the cross-site document content for purposes | |
175 // other than JS/CSS (e.g, XHR). | |
176 bool ok_status_code = !IsErrorStatusCode(resp_data.http_status_code); | |
177 | |
178 // This is only used for measuring false-negative analysis for | |
Charlie Reis
2013/08/09 18:48:38
nit: "measuring" and "analysis" are redundant here
dsjang
2013/08/12 22:56:17
Done.
| |
179 // non-blocked resources. | |
180 bool is_sniffed_for_js = SniffForJS(data, length); | |
181 | |
182 // Record the number of responses whose content is sniffed for what | |
183 // its mime type claims it to be. For example, we apply a HTML | |
184 // sniffer for a document tagged with text/html here. Whenever this | |
185 // check becomes true, we'll block the response. | |
186 switch (resp_data.canonical_mime_type) { | |
187 case ResponseMetaData::HTML: | |
188 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForHTML(data, length), | |
189 "SiteIsolation.XSD.MimeType.HTML"); | |
Charlie Reis
2013/08/09 18:48:38
I don't think we need "MimeType" in the stat name
dsjang
2013/08/12 22:56:17
Done.
| |
190 break; | |
191 case ResponseMetaData::XML: | |
192 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForXML(data, length), | |
193 "SiteIsolation.XSD.MimeType.XML"); | |
194 break; | |
195 case ResponseMetaData::JSON: | |
196 SITE_ISOLATION_POLICY_SNIFF_AND_COUNT(SniffForJSON(data, length), | |
197 "SiteIsolation.XSD.MimeType.JSON"); | |
198 break; | |
199 case ResponseMetaData::Plain: | |
200 if (SniffForHTML(data, length)) { | |
201 SITE_ISOLATION_POLICY_COUNT_BLOCK( | |
202 "SiteIsolation.XSD.MimeType.Plain.HTML"); | |
203 } else if (SniffForXML(data, length)) { | |
204 SITE_ISOLATION_POLICY_COUNT_BLOCK( | |
205 "SiteIsolation.XSD.MimeType.Plain.XML"); | |
206 } else if (SniffForJSON(data, length)) { | |
207 SITE_ISOLATION_POLICY_COUNT_BLOCK( | |
208 "SiteIsolation.XSD.MimeType.Plain.JSON"); | |
209 } else if (is_sniffed_for_js) { | |
210 SITE_ISOLATION_POLICY_COUNT_NOTBLOCK( | |
211 "SiteIsolation.XSD.MimeType.Plain"); | |
212 } | |
213 break; | |
214 default : | |
215 DCHECK(false); | |
Charlie Reis
2013/08/09 18:48:38
Use NOTREACHED() instead.
nasko
2013/08/09 19:07:28
nit: You can add << "message" to clarify why this
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
This is very useful. Thanks for letting me know th
| |
216 break; | |
217 } | |
218 } | |
219 | |
220 #undef SITE_ISOLATION_POLICY_COUNT_BLOCK | |
221 #undef SITE_ISOLATION_POLICY_COUNT_NOTBLOCK | |
222 #undef SITE_ISOLATION_POLICY_SNIFF_AND_COUNT | |
Charlie Reis
2013/08/09 18:48:38
Probably should undef these in reverse order, sinc
dsjang
2013/08/12 22:56:17
Done.
| |
223 | |
224 void SiteIsolationPolicy::DidFinishResourceLoad(unsigned identifier) { | |
225 id_target_map_.erase(identifier); | |
226 if (!id_url_map_.count(identifier)) { | |
227 url_responsedata_map_.erase(id_url_map_[identifier]); | |
228 id_url_map_.erase(identifier); | |
229 } | |
230 } | |
231 | |
232 void SiteIsolationPolicy::DidFinishResourceLoadForUrl( | |
233 const WebKit::WebURL& web_response_url) { | |
234 GURL response_url(web_response_url); | |
235 | |
236 if (!url_responsedata_map_.count(response_url.spec())) { | |
237 ResponseMetaData meta_data = url_responsedata_map_[response_url.spec()]; | |
238 url_responsedata_map_.erase(response_url.spec()); | |
239 id_target_map_.erase(meta_data.request_identifier); | |
240 id_url_map_.erase(meta_data.request_identifier); | |
241 } | |
242 } | |
243 | |
244 ResponseMetaData::CanonicalMimeType SiteIsolationPolicy::GetCanonicalMimeType( | |
245 const WebURLResponse& response) { | |
246 static const char TEXT_HTML[] = "text/html"; | |
Charlie Reis
2013/08/09 18:48:38
Constants should be formatted as kTextHtml, etc.
dsjang
2013/08/12 22:56:17
Done.
| |
247 static const char TEXT_XML[] = "text/xml"; | |
248 static const char APP_RSS_XML[] = "application/rss+xml"; | |
249 static const char APP_XML[] = "application/xml"; | |
250 static const char APP_JSON[] = "application/json"; | |
251 static const char TEXT_XJSON[] = "text/x-json"; | |
252 static const char TEXT_JSON[] = "text/json"; | |
253 static const char TEXT_PLAIN[] = "text/json"; | |
Charlie Reis
2013/08/09 18:48:38
Shouldn't this be text/plain?
dsjang
2013/08/12 22:56:17
Done.
| |
254 | |
255 const std::string mime_type = response.mimeType().utf8(); | |
256 | |
257 LOG(ERROR) << "mimetype:" << mime_type << "==[" << TEXT_HTML << "]"; | |
nasko
2013/08/09 19:07:28
Why is this TEXT_HTML there? Wouldn't it always pr
dsjang
2013/08/12 22:56:17
Done.
| |
258 | |
259 // These are a thorough list of the mime types crawled over the top | |
260 // 50k sites related to HTML, XML, JSON, Plain. | |
Charlie Reis
2013/08/09 18:48:38
This comment belongs above your constants.
dsjang
2013/08/12 22:56:17
Done.
| |
261 if (LowerCaseEqualsASCII(mime_type, TEXT_HTML)) { | |
262 return ResponseMetaData::HTML; | |
263 } else if (LowerCaseEqualsASCII(mime_type, TEXT_XML) || | |
nasko
2013/08/09 19:07:28
nit: Have you ordered these in order of decreasing
dsjang
2013/08/12 22:56:17
Done.
| |
264 LowerCaseEqualsASCII(mime_type, APP_RSS_XML) || | |
265 LowerCaseEqualsASCII(mime_type, APP_XML)) { | |
266 return ResponseMetaData::XML; | |
267 } else if (LowerCaseEqualsASCII(mime_type, APP_JSON) || | |
268 LowerCaseEqualsASCII(mime_type, TEXT_XJSON) || | |
269 LowerCaseEqualsASCII(mime_type, TEXT_JSON)) { | |
270 return ResponseMetaData::JSON; | |
271 } else if (LowerCaseEqualsASCII(mime_type, TEXT_PLAIN)) { | |
272 return ResponseMetaData::Plain; | |
273 } else { | |
274 return ResponseMetaData::Others; | |
275 } | |
276 } | |
277 | |
278 bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) { | |
279 // We exclude ftp:// from here. FTP doesn't provide a Content-Type | |
280 // header which our policy depends on, so we cannot protect any | |
281 // document from FTP servers. | |
282 return url.SchemeIs("http") || url.SchemeIs("https"); | |
283 } | |
284 | |
285 bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin, | |
Charlie Reis
2013/08/09 18:48:38
This seems like it should be modeled more after Si
| |
286 const GURL& response_url) { | |
287 if (frame_origin.scheme() != response_url.scheme()) | |
288 return false; | |
289 | |
290 // Extract the effective domains (public suffix plus one) of the | |
291 // urls. | |
292 | |
293 // TODO(dsjang): Is there any reason why we don't use | |
nasko
2013/08/09 19:07:28
This will be good to iron out before we commit the
dsjang
2013/08/12 22:56:17
Done.
| |
294 // net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES | |
295 // instead of | |
296 // net::registry_controlled_domains::EXCLUSE_PRIVATE_REGISTRIES? If | |
297 // we allow sites to use their private registries, they can use | |
298 // "finer grained" sites than only using public ones. | |
Charlie Reis
2013/08/09 18:48:38
Just change this to be a TODO to use INCLUDE_PRIVA
dsjang
2013/08/12 22:56:17
Done.
| |
299 std::string frame_domain = | |
300 net::registry_controlled_domains::GetDomainAndRegistry( | |
301 frame_origin, | |
302 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | |
303 std::string response_domain = | |
304 net::registry_controlled_domains::GetDomainAndRegistry( | |
305 response_url, | |
306 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | |
307 | |
308 return frame_domain == response_domain; | |
309 } | |
310 | |
311 bool SiteIsolationPolicy::IsFrameNavigating(WebKit::WebFrame* frame) { | |
312 // When a navigation starts, frame->provisionalDataSource() is set | |
313 // to a not-null value which stands for the request made for the | |
314 // navigation. As soon as the network request is committed to the | |
315 // frame, frame->provisionalDataSource() is converted to null, and | |
316 // the committed data source is moved to frame->dataSource(). This | |
317 // is the most reliable way to detect whether the frame is in | |
318 // navigation or not by far. | |
Charlie Reis
2013/08/09 18:48:38
nit: drop "by far"
nasko
2013/08/09 19:07:28
Awesome comment! Thank you! I've learned something
dsjang
2013/08/12 22:56:17
Thank you here! I'm so glad that you got something
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
Done.
| |
319 return frame->provisionalDataSource() != NULL; | |
320 } | |
321 | |
322 bool SiteIsolationPolicy::IsValidCorsHeaderSet( | |
323 GURL& frame_origin, | |
324 GURL& website_origin, | |
325 std::string access_control_origin) { | |
326 | |
327 size_t access_control_origin_len = access_control_origin.size(); | |
328 | |
329 // TODO(dsjang): Is this actually true? The server seems to return | |
Charlie Reis
2013/08/09 18:48:38
Can we resolve this TODO?
dsjang
2013/08/12 22:56:17
third_party/WebKit/Source/core/loader/CrossOriginA
| |
330 // an empty string or "null". | |
331 if (access_control_origin_len == 0) | |
332 return false; | |
333 | |
334 // Many websites are sending back "\"*\"" instead of "*". This is | |
335 // non-standard practice, and seems not supported by the | |
336 // brwoser. Refer to | |
337 // CrossOriginAccessControl::passesAccessControlCheck(). | |
338 | |
339 // TODO(dsjang): * is not allowed for the response from a request | |
340 // with cookies. This allows for more than what the renderer will | |
341 // eventually be able to receive, so we won't see illegal cross-site | |
342 // documents alllowed by this. We have to have t a way to see if | |
Charlie Reis
2013/08/09 18:48:38
typos: alllowed, t
nasko
2013/08/09 19:07:28
nit: Not sure what "t a way" stands for. To find a
dsjang
2013/08/12 22:56:17
Done.
dsjang
2013/08/12 22:56:17
Done.
| |
343 // this response is from a cookie-tagged request or not in the | |
344 // future. | |
345 if (access_control_origin == "*") | |
346 return true; | |
347 | |
348 // TODO(dsjang): The CORS spec only treats a fully specified URL, | |
349 // except for "*", but many websites are using just a domain for | |
350 // access_control_origin, and this is blocked by Webkit's CORS logic | |
351 // here : CrossOriginAccessControl::passesAccessControlCheck() | |
352 | |
353 // We don't use Webkit's existing CORS policy implementation since | |
354 // their policy works in terms of origins, not sites. For | |
355 // example, when frame is sub.a.com and it is not allowed to access | |
356 // a document with sub1.a.com. But under Site Isolation, it's | |
357 // allowed. | |
Charlie Reis
2013/08/09 18:48:38
Please move this comment to the top of this functi
dsjang
2013/08/12 22:56:17
Done.
| |
358 | |
359 // TODO(dsjang): examine createFromString()'s behavior for a URL | |
360 // containing * in it. | |
361 WebKit::WebSecurityOrigin cors_security_origin = | |
Charlie Reis
2013/08/09 18:48:38
Why send this through WebSecurityOrigin if we're j
dsjang
2013/08/12 22:56:17
Done.
| |
362 WebKit::WebSecurityOrigin::createFromString( | |
363 WebKit::WebString::fromUTF8(access_control_origin)); | |
364 GURL cors_origin(cors_security_origin.toString().utf8()); | |
365 | |
366 LOG(ERROR) << cors_security_origin.toString().utf8(); | |
Charlie Reis
2013/08/09 18:48:38
We'll need to get rid of all the log statements be
dsjang
2013/08/12 22:56:17
Sure. I'll do that when the CL is ready for commit
| |
367 return IsSameSite(frame_origin, cors_origin); | |
368 } | |
369 | |
370 bool SiteIsolationPolicy::SniffForHTML(const char* data, size_t length) { | |
Charlie Reis
2013/08/09 18:48:38
Can you put a comment somewhere saying which funct
dsjang
2013/08/12 22:56:17
Done.
| |
371 // TODO(dsjang): The content sniffer used by Chrome and Firefox are | |
Charlie Reis
2013/08/09 18:48:38
This part of the comment isn't a TODO. However, i
dsjang
2013/08/12 22:56:17
Done.
| |
372 // using "<!--" as one of the HTML signatures, but it also appears | |
373 // in valid JavaScript, considered as well-formed JS by the browser. | |
374 // Since we do not want to block any JS, we exclude it from our HTML | |
375 // signatures. This can weaken our document block policy, but we can | |
376 // break less websites. | |
377 const char* html_signatures[] = {"<!DOCTYPE html", // HTML5 spec | |
378 "<script", // HTML5 spec, Mozilla | |
379 "<html", // HTML5 spec, Mozilla | |
380 "<head", // HTML5 spec, Mozilla | |
381 "<iframe", // Mozilla | |
382 "<h1", // Mozilla | |
383 "<div", // Mozilla | |
384 "<font", // Mozilla | |
385 "<table", // Mozilla | |
386 "<a", // Mozilla | |
387 "<style", // Mozilla | |
388 "<title", // Mozilla | |
389 "<b", // Mozilla | |
390 "<body", // Mozilla | |
391 "<br", "<p" // Mozilla | |
392 }; | |
393 return DoSignatureMatching( | |
394 data, length, html_signatures, arraysize(html_signatures)); | |
395 } | |
396 | |
397 bool SiteIsolationPolicy::SniffForXML(const char* data, size_t length) { | |
398 const char* xml_signatures[] = {"<?xml" // Mozilla | |
399 }; | |
400 return DoSignatureMatching( | |
401 data, length, xml_signatures, arraysize(xml_signatures)); | |
402 } | |
403 | |
404 bool SiteIsolationPolicy::SniffForJSON(const char* data, size_t length) { | |
405 // TODO(dsjang): We have to come up with a better way to sniff | |
406 // JSON. However, even RE cannot help us that much due to the fact | |
407 // that we don't do full parsing. This DFA starts with state 0, and | |
408 // finds 1) {, 2) "or', 3) : in the order. This is intentionally not | |
Charlie Reis
2013/08/09 18:48:38
finds {, "/', and : in that order.
dsjang
2013/08/12 22:56:17
Done.
| |
409 // using a regular expression library so that we can make the | |
410 // trusted code base as small as possible. State 4 is a dead state. | |
Charlie Reis
2013/08/09 18:48:38
I'd change this sentence to just say that we're av
dsjang
2013/08/12 22:56:17
Done.
| |
411 const int INIT_ST = 0; | |
nasko
2013/08/09 19:07:28
nit: I'd use less abbreviation in these, but it is
| |
412 const int LBRACE_ST = 1; | |
413 const int LQUOTE_ST = 2; | |
414 const int COLON_ST = 3; | |
415 const int DEAD_ST = 4; | |
416 | |
417 int state = INIT_ST; | |
418 for (size_t i = 0; i < length && state < COLON_ST; ++i, ++data) { | |
419 const char c = *data; | |
420 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') | |
421 continue; | |
422 | |
423 switch (state) { | |
424 case INIT_ST: | |
425 if (c == '{') | |
426 state = LBRACE_ST; | |
427 else | |
428 state = DEAD_ST; | |
429 break; | |
430 case LBRACE_ST: | |
431 if (c == '\"' || c == '\'') | |
432 state = LQUOTE_ST; | |
433 else | |
434 state = DEAD_ST; | |
435 break; | |
436 case LQUOTE_ST: | |
437 if (c == ':') { | |
Charlie Reis
2013/08/09 18:48:38
nit: No braces needed.
dsjang
2013/08/12 22:56:17
Done.
| |
438 state = COLON_ST; | |
439 } | |
440 break; | |
441 default: | |
Charlie Reis
2013/08/09 18:48:38
NOTREACHED()
dsjang
2013/08/12 22:56:17
Done.
| |
442 break; | |
443 } | |
444 } | |
445 return state == COLON_ST; | |
446 } | |
447 | |
448 bool SiteIsolationPolicy::DoSignatureMatching(const char* data, | |
Charlie Reis
2013/08/09 18:48:38
MatchesSignature might be a better name.
| |
449 size_t length, | |
450 const char* signatures[], | |
451 size_t arr_size) { | |
452 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { | |
453 const char* signature = signatures[sig_index]; | |
454 size_t signature_length = strlen(signature); | |
455 size_t i = 0; | |
456 // Skip the white characters at the beginning of the document. | |
457 for (i = 0; i < length; ++i) { | |
458 char c = *data; | |
459 if (!(c == ' ' || c == '\r' || c == '\n' || c == '\t')) { | |
Charlie Reis
2013/08/09 18:48:38
nit: No braces needed on one-line clause.
dsjang
2013/08/12 22:56:17
Done.
| |
460 break; | |
461 } | |
462 ++data; | |
nasko
2013/08/09 19:07:28
Why are you modifying the input parameter? You als
dsjang
2013/08/12 22:56:17
Thanks a lot!! Done.
| |
463 } | |
464 length = length - i; | |
465 if (length < signature_length) | |
nasko
2013/08/09 19:07:28
Is there a reason why we can't put this before we
dsjang
2013/08/12 22:56:17
Done.
| |
466 continue; | |
467 if (!base::strncasecmp(signature, data, signature_length)) { | |
468 return true; | |
469 } | |
470 } | |
471 return false; | |
472 } | |
473 | |
474 bool SiteIsolationPolicy::IsErrorStatusCode(int status_code) { | |
Charlie Reis
2013/08/09 18:48:38
The only place you use this is when deciding if it
dsjang
2013/08/12 22:56:17
Done.
| |
475 // Chrome only uses the content of a response with one of these | |
476 // status codes for CSS/JavaScript. For images, Chrome just ignores | |
477 // status code. | |
478 const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, 301, 302, | |
479 303, 305, 306, 307}; | |
480 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { | |
481 if (renderable_status_code[i] == status_code) | |
482 return false; | |
483 } | |
484 return true; | |
485 } | |
486 | |
487 bool SiteIsolationPolicy::SniffForJS(const char* data, size_t length) { | |
488 // TODO(dsjang): This is a real hacking. The only purpose of this | |
Charlie Reis
2013/08/09 18:48:38
hacking -> hack
dsjang
2013/08/12 22:56:17
Done.
| |
489 // function is to try to see if there's any possibility that this | |
490 // data can be JavaScript.(superset of JS). This function will be | |
Charlie Reis
2013/08/09 18:48:38
nit: space, not period, after JavaScript
dsjang
2013/08/12 22:56:17
Done.
| |
491 // removed for the production code. | |
Charlie Reis
2013/08/09 18:48:38
will be removed once UMA stats are gathered.
dsjang
2013/08/12 22:56:17
Done.
| |
492 | |
493 // Search for "var " for JS detection. :-) | |
494 for (size_t i = 0; i < length - 3; ++i) { | |
495 if (strncmp(data, "var ", 4) == 0) { | |
Charlie Reis
2013/08/09 18:48:38
nit: No brace.
dsjang
2013/08/12 22:56:17
Done.
| |
496 return true; | |
497 } | |
498 ++data; | |
499 } | |
500 return false; | |
501 } | |
502 | |
503 } // namespace webkit_glue | |
OLD | NEW |