Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(50)

Side by Side Diff: net/filter/sdch_policy_delegate.cc

Issue 2368433002: Add net::SdchSourceStream and net::SdchPolicyDelegate (Closed)
Patch Set: fix compile error Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/filter/sdch_policy_delegate.h"
6
7 #include "base/metrics/histogram_macros.h"
8 #include "base/strings/string_util.h"
9 #include "base/values.h"
10 #include "net/base/sdch_net_log_params.h"
11 #include "net/base/sdch_problem_codes.h"
12 #include "net/log/net_log.h"
13 #include "net/log/net_log_event_type.h"
14 #include "net/url_request/url_request_http_job.h"
15 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
16
17 namespace net {
18
19 namespace {
20
21 const char kRefreshHtml[] =
22 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
23 // Mime types:
24 const char kTextHtml[] = "text/html";
25
26 } // namespace
27
28 SdchPolicyDelegate::SdchPolicyDelegate(
29 URLRequestHttpJob* job,
30 std::string mime_type,
31 const GURL& url,
32 bool is_cached_content,
33 SdchManager* sdch_manager,
34 std::unique_ptr<SdchManager::DictionarySet> dictionary_set,
35 int response_code,
36 const NetLogWithSource& net_log)
37 : job_(job),
38 mime_type_(mime_type),
39 url_(url),
40 is_cached_content_(is_cached_content),
41 sdch_manager_(sdch_manager),
42 dictionary_set_(std::move(dictionary_set)),
43 response_code_(response_code),
44 net_log_(net_log) {}
45
46 SdchPolicyDelegate::~SdchPolicyDelegate() {}
47
48 // static
49 void SdchPolicyDelegate::FixUpSdchContentEncodings(
50 const NetLogWithSource& net_log,
51 const std::string& mime_type,
52 SdchManager::DictionarySet* dictionary_set,
53 std::vector<SourceStream::SourceType>* types) {
54 // If content encoding included SDCH, then everything is "relatively" fine.
55 if (!types->empty() && types->front() == SourceStream::TYPE_SDCH) {
Randy Smith (Not in Mondays) 2016/10/13 22:05:25 This looks to me like a functionality change? In
xunjieli 2016/10/14 18:23:34 Done. Sorry about that! Now it should match to Fix
56 // Some proxies (found currently in Argentina) strip the Content-Encoding
57 // text from "sdch,gzip" to a mere "sdch" without modifying the compressed
58 // payload. To handle this gracefully, we simulate the "probably" deleted
59 // ",gzip" by appending a tentative gzip decode, which will default to a
60 // no-op pass through filter if it doesn't get gzip headers where
61 // expected.
62 if (1 == types->size()) {
63 types->push_back(SourceStream::TYPE_GZIP_FALLBACK);
64 LogSdchProblem(net_log, SDCH_OPTIONAL_GUNZIP_ENCODING_ADDED);
65 }
66 return;
67 }
68
69 // If sdch dictionary is advertised, we might need to add some decoding, as
70 // some proxies strip encoding completely.
71 if (!dictionary_set)
72 return;
73
74 // There are now several cases to handle for an SDCH request. Foremost, if
75 // the outbound request was stripped so as not to advertise support for
76 // encodings, we might get back content with no encoding, or (for example)
77 // just gzip. We have to be sure that any changes we make allow for such
78 // minimal coding to work. That issue is why we use TENTATIVE filters if we
79 // add any, as those filters sniff the content, and act as pass-through
80 // filters if headers are not found.
Randy Smith (Not in Mondays) 2016/10/13 22:05:26 This is how they should work, but IIUC TENTATIVE f
xunjieli 2016/10/14 18:23:34 Done.
81
82 // If the outbound GET is not modified, then the server will generally try to
83 // send us SDCH encoded content. As that content returns, there are several
84 // corruptions of the header "content-encoding" that proxies may perform (and
85 // have been detected in the wild). We already dealt with the a honest
86 // content encoding of "sdch,gzip" being corrupted into "sdch" with on change
Randy Smith (Not in Mondays) 2016/10/13 22:05:25 nit: "on" -> "no"
xunjieli 2016/10/14 18:23:34 Done.
87 // of the actual content. Another common corruption is to either disscard
Randy Smith (Not in Mondays) 2016/10/13 22:05:25 nit: "discard"
xunjieli 2016/10/14 18:23:34 Done.
88 // the accurate content encoding, or to replace it with gzip only (again, with
89 // no change in actual content). The last observed corruption it to actually
90 // change the content, such as by re-gzipping it, and that may happen along
91 // with corruption of the stated content encoding (wow!).
92
93 // The one unresolved failure mode comes when we advertise a dictionary, and
94 // the server tries to *send* a gzipped file (not gzip encode content), and
95 // then we could do a gzip decode :-(. Since SDCH is only (currently)
96 // supported server side on paths that only send HTML content, this mode has
97 // never surfaced in the wild (and is unlikely to).
Randy Smith (Not in Mondays) 2016/10/13 22:05:25 Sadly, this last sentence is no longer true. I ha
xunjieli 2016/10/14 18:23:34 Done.
98 // We will gather a lot of stats as we perform the fixups
99 if (base::StartsWith(mime_type, kTextHtml,
100 base::CompareCase::INSENSITIVE_ASCII)) {
101 // Suspicious case: Advertised dictionary, but server didn't use sdch, and
102 // we're HTML tagged.
103 if (types->empty()) {
104 LogSdchProblem(net_log, SDCH_ADDED_CONTENT_ENCODING);
105 } else if (1 == types->size()) {
106 LogSdchProblem(net_log, SDCH_FIXED_CONTENT_ENCODING);
107 } else {
108 LogSdchProblem(net_log, SDCH_FIXED_CONTENT_ENCODINGS);
109 }
110 } else {
111 // Remarkable case!?! We advertised an SDCH dictionary, content-encoding
112 // was not marked for SDCH processing: Why did the server suggest an SDCH
113 // dictionary in the first place??. Also, the content isn't
114 // tagged as HTML, despite the fact that SDCH encoding is mostly likely
115 // for HTML: Did some anti-virus system strip this tag (sometimes they
116 // strip accept-encoding headers on the request)?? Does the content
117 // encoding not start with "text/html" for some other reason?? We'll
118 // report this as a fixup to a binary file, but it probably really is
119 // text/html (some how).
120 if (types->empty()) {
121 LogSdchProblem(net_log, SDCH_BINARY_ADDED_CONTENT_ENCODING);
122 } else if (1 == types->size()) {
123 LogSdchProblem(net_log, SDCH_BINARY_FIXED_CONTENT_ENCODING);
124 } else {
125 LogSdchProblem(net_log, SDCH_BINARY_FIXED_CONTENT_ENCODINGS);
126 }
127 }
128
129 // Leave the existing encoding type to be processed first, and add our
130 // tentative decodings to be done afterwards. Vodaphone UK reportedyl will
Randy Smith (Not in Mondays) 2016/10/13 22:05:25 nit: "reportedly"
xunjieli 2016/10/14 18:23:34 Done.
131 // perform a second layer of gzip encoding atop the server's sdch,gzip
132 // encoding, and then claim that the content encoding is a mere gzip. As a
133 // result we'll need (in that case) to do the gunzip, plus our tentative
134 // gunzip and tentative SDCH decoding. This approach nicely handles the
135 // empty() list as well, and should work with other (as yet undiscovered)
136 // proxies the choose to re-compressed with some other encoding (such as
137 // bzip2, etc.).
138 types->insert(types->begin(), SourceStream::TYPE_GZIP_FALLBACK);
139 types->insert(types->begin(), SourceStream::TYPE_SDCH_POSSIBLE);
140 }
141
142 // Dictionary id errors are often the first indication that the SDCH stream has
143 // become corrupt. There are many possible causes: non-200 response codes, a
144 // cached non-SDCH-ified reply, or a response that claims to be SDCH but isn't
145 // actually. These are handled here by issuing a meta-refresh or swapping to the
146 // "passthrough" mode if appropriate, or failing the request if the error is
147 // unrecoverable.
148 SdchPolicyDelegate::ErrorRecovery SdchPolicyDelegate::OnDictionaryIdError(
149 bool possible_pass_through,
150 std::string* replace_output) {
151 if (possible_pass_through) {
152 LogCorruptionDetection(RESPONSE_TENTATIVE_SDCH);
153 return PASS_THROUGH;
Randy Smith (Not in Mondays) 2016/10/13 22:05:26 Sadly, I think this is a change from the original
xunjieli 2016/10/14 18:23:34 Done. Ah, you are right! I didn't realize that.
154 }
155 // HTTP 404 might be an unencoded error page, so if decoding failed, pass it
156 // through. TODO(xunjieli): Remove this. crbug.com/516773.
157 if (response_code_ == 404) {
158 LogSdchProblem(net_log_, SDCH_PASS_THROUGH_404_CODE);
159 LogCorruptionDetection(RESPONSE_404);
160 return PASS_THROUGH;
161 }
162
163 // HTTP !200 gets a meta-refresh for HTML.
164 // TODO(xunjieli): remove this. crbug.com/654393.
165 if (response_code_ != 200) {
166 LogCorruptionDetection(RESPONSE_NOT_200);
167 return IssueMetaRefreshIfPossible(replace_output);
168 }
169
170 // If this is a cached result and the source hasn't requested a dictionary, it
171 // probably never had a dictionary to begin and is an unencoded response from
172 // earlier.
173 if (is_cached_content_) {
174 LogSdchProblem(net_log_, SDCH_PASS_THROUGH_OLD_CACHED);
175 LogCorruptionDetection(RESPONSE_OLD_UNENCODED);
176 return PASS_THROUGH;
177 }
178
179 // The original request didn't advertise any dictionaries, but the
180 // response claimed to be SDCH. There is no way to repair this situation: the
181 // original request already didn't advertise any dictionaries, and retrying it
182 // would likely have the/ same result. Blacklist the domain and try passing
183 // through.
184 if (!dictionary_set_) {
185 sdch_manager_->BlacklistDomain(url_, SDCH_PASSING_THROUGH_NON_SDCH);
186 LogCorruptionDetection(RESPONSE_ENCODING_LIE);
187 return PASS_THROUGH;
188 }
189 return IssueMetaRefreshIfPossible(replace_output);
Randy Smith (Not in Mondays) 2016/10/13 22:05:26 Hmmm. In the old code, no matter how we left the
xunjieli 2016/10/14 18:23:34 Done.
190 }
191
192 // Dictionary fails to load when we have a plausible dictionay id. There are
193 // many possible causes: a cached SDCH-ified reply for which the SdchManager did
194 // not have the dictionary or a corrupted response. These are handled here by
195 // issuing a meta-refresh except the case where response code is 404.
196 SdchPolicyDelegate::ErrorRecovery SdchPolicyDelegate::OnGetDictionaryError(
197 bool possible_pass_through,
198 std::string* replace_output) {
199 if (possible_pass_through) {
200 LogCorruptionDetection(RESPONSE_TENTATIVE_SDCH);
201 return PASS_THROUGH;
202 }
203 // HTTP 404 might be an unencoded error page, so if decoding failed, pass it
204 // through. TODO(xunjieli): Remove this case crbug.com/516773.
205 if (response_code_ == 404) {
206 LogSdchProblem(net_log_, SDCH_PASS_THROUGH_404_CODE);
207 LogCorruptionDetection(RESPONSE_404);
208 return PASS_THROUGH;
209 }
210 return IssueMetaRefreshIfPossible(replace_output);
Randy Smith (Not in Mondays) 2016/10/13 22:05:26 Much like the above, the lack of a LogCorruptionDe
xunjieli 2016/10/14 18:23:34 Done.
211 }
212
213 SdchPolicyDelegate::ErrorRecovery SdchPolicyDelegate::OnDecodingError(
214 std::string* replace_output) {
215 // A decoding error, as opposed to a dictionary error, indicates a
216 // decompression failure partway through the payload of the SDCH stream,
217 // which means that the filter already witnessed a valid dictionary ID and
218 // successfully retrieved a dictionary for it. Decoding errors are not
219 // recoverable and it is not appropriate to stop decoding, so there are
220 // relatively few error cases here.
221 //
222 // In particular, a decoding error for an HTML payload is recoverable by
223 // issuing a meta-refresh, but to avoid having that happen too often, this
224 // class also temporarily blacklists the domain. A decoding error for a
225 // non-HTML payload is unrecoverable, so such an error gets a permanent
226 // blacklist entry. If the content was cached, no blacklisting is needed.
227 // TODO(xunjieli): This case should be removed. crbug.com/651821.
228 return IssueMetaRefreshIfPossible(replace_output);
229 }
230
231 bool SdchPolicyDelegate::OnGetDictionary(const std::string& server_id,
232 const std::string** text) {
233 if (dictionary_set_) {
234 *text = dictionary_set_->GetDictionaryText(server_id);
235 if (*text) {
236 server_id_ = server_id;
237 return true;
238 }
239 }
240 // This is a hack. Naively, the dictionaries available for
241 // decoding should be only the ones advertised. However, there are
242 // cases, specifically resources encoded with old dictionaries living
243 // in the cache, that mean the full set of dictionaries should be made
244 // available for decoding. It's not known how often this happens;
245 // if it happens rarely enough, this code can be removed.
246 //
247 // TODO(rdsmith): Long-term, a better solution is necessary, since
248 // an entry in the cache being encoded with the dictionary doesn't
249 // guarantee that the dictionary is present. That solution probably
250 // involves storing unencoded resources in the cache, but might
251 // involve evicting encoded resources on dictionary removal.
252 // See http://crbug.com/383405.
253 SdchProblemCode rv = SDCH_OK;
254 unexpected_dictionary_set_ =
255 sdch_manager_->GetDictionarySetByHash(url_, server_id, &rv);
256 if (unexpected_dictionary_set_) {
257 *text = unexpected_dictionary_set_->GetDictionaryText(server_id);
258 LogSdchProblem(net_log_, is_cached_content_
259 ? SDCH_UNADVERTISED_DICTIONARY_USED_CACHED
260 : SDCH_UNADVERTISED_DICTIONARY_USED);
261 if (*text) {
262 server_id_ = server_id;
263 return true;
264 }
265 } else {
266 LogSdchProblem(net_log_, SDCH_DICTIONARY_HASH_NOT_FOUND);
267 LogCorruptionDetection(RESPONSE_NO_DICTIONARY);
Randy Smith (Not in Mondays) 2016/10/13 22:05:25 This seems like the wrong place to log a corruptio
xunjieli 2016/10/14 18:23:34 Done.
268 }
269 return false;
270 }
271
272 void SdchPolicyDelegate::OnStreamDestroyed(
Randy Smith (Not in Mondays) 2016/10/13 22:05:26 Why a separate function rather than in ~SdchPolicy
xunjieli 2016/10/14 18:23:34 I used a separate function to pass in |input_state
Randy Smith (Not in Mondays) 2016/10/19 19:49:41 Ah, good point; I should have thought of that.
xunjieli 2016/10/20 00:09:14 Done.
273 SdchSourceStream::InputState input_state,
274 const std::string& buffered_output,
275 open_vcdiff::VCDiffStreamingDecoder* decoder) {
276 if (decoder) {
277 if (!decoder->FinishDecoding()) {
278 LogSdchProblem(net_log_, SDCH_INCOMPLETE_SDCH_CONTENT);
279 // Make it possible for the user to hit reload, and get non-sdch content.
280 // Note this will "wear off" quickly enough, and is just meant to assure
281 // in some rare case that the user is not stuck.
282 sdch_manager_->BlacklistDomain(url_, SDCH_INCOMPLETE_SDCH_CONTENT);
283 }
284 }
285 // Filter chaining error, or premature teardown.
286 if (!buffered_output.empty())
287 LogSdchProblem(net_log_, SDCH_UNFLUSHED_CONTENT);
288
289 // FIXME(xunjieli): Why do we do an early return here?
Randy Smith (Not in Mondays) 2016/10/13 22:05:25 For the logging, maybe because we've already recor
xunjieli 2016/10/14 18:23:34 Done. Ah, that makes sense. I added a comment.
290 if (is_cached_content_) {
291 // Not a real error, but it is useful to have this tally.
292 // TODO(jar): Remove this stat after SDCH stability is validated.
293 LogSdchProblem(net_log_, SDCH_CACHE_DECODED);
294 return; // We don't need timing stats, and we aready got ratios.
295 }
296 switch (input_state) {
297 case SdchSourceStream::STATE_DECODE: {
298 job_->RecordPacketStats(FilterContext::StatisticSelector::SDCH_DECODE);
299 // Allow latency experiments to proceed.
300 sdch_manager_->SetAllowLatencyExperiment(url_, true);
301
302 // Notify successful dictionary usage.
303 DCHECK(!server_id_.empty());
304 sdch_manager_->OnDictionaryUsed(server_id_);
305 return;
306 }
307 case SdchSourceStream::STATE_LOAD_DICTIONARY:
308 LogSdchProblem(net_log_, SDCH_PRIOR_TO_DICTIONARY);
309 return;
310 case SdchSourceStream::STATE_PASS_THROUGH:
311 job_->RecordPacketStats(
312 FilterContext::StatisticSelector::SDCH_PASSTHROUGH);
313 return;
314 case SdchSourceStream::STATE_OUTPUT_REPLACE:
315 // This is meta refresh case. Already accounted for when set.
316 return;
317 } // end of switch.
318 }
319
320 // TODO(xunjieli): Remove meta refresh. crbug.com/651821.
321 SdchPolicyDelegate::ErrorRecovery
322 SdchPolicyDelegate::IssueMetaRefreshIfPossible(std::string* replace_output) {
323 // Errors for non-HTML payloads are unrecoverable and get the domain
324 // blacklisted indefinitely.
325 if (mime_type_.npos == mime_type_.find("text/html")) {
326 SdchProblemCode problem =
327 (is_cached_content_ ? SDCH_CACHED_META_REFRESH_UNSUPPORTED
328 : SDCH_META_REFRESH_UNSUPPORTED);
329 sdch_manager_->BlacklistDomainForever(url_, problem);
330 LogSdchProblem(net_log_, problem);
331 return NONE;
332 }
333
334 if (is_cached_content_) {
335 // Cached content is a probably startup tab, so just get the fresh content
336 // and try again, without disabling SDCH.
337 LogSdchProblem(net_log_, SDCH_META_REFRESH_CACHED_RECOVERY);
338 } else {
339 // Since it wasn't in the cache, blacklist for some period to get the
340 // correct content.
341 sdch_manager_->BlacklistDomain(url_, SDCH_META_REFRESH_RECOVERY);
342 LogSdchProblem(net_log_, SDCH_META_REFRESH_RECOVERY);
343 }
344
345 *replace_output = std::string(kRefreshHtml, strlen(kRefreshHtml));
346 return REPLACE_OUTPUT;
347 }
348
349 void SdchPolicyDelegate::LogSdchProblem(NetLogWithSource netlog,
Randy Smith (Not in Mondays) 2016/10/13 22:05:26 nit: // static
xunjieli 2016/10/14 18:23:34 Done.
350 SdchProblemCode problem) {
351 SdchManager::SdchErrorRecovery(problem);
352 netlog.AddEvent(NetLogEventType::SDCH_DECODING_ERROR,
353 base::Bind(&NetLogSdchResourceProblemCallback, problem));
354 }
355
356 void SdchPolicyDelegate::LogCorruptionDetection(
Randy Smith (Not in Mondays) 2016/10/13 22:05:25 Suggestion: It feels weird/inconsistent to have tw
xunjieli 2016/10/14 18:23:34 Done.
357 ResponseCorruptionDetectionCause cause) {
358 // Use if statement rather than ?: because UMA_HISTOGRAM_ENUMERATION
359 // caches the histogram name based on the call site.
360 if (is_cached_content_) {
361 UMA_HISTOGRAM_ENUMERATION("Sdch3.ResponseCorruptionDetection.Cached", cause,
362 RESPONSE_MAX);
363 } else {
364 UMA_HISTOGRAM_ENUMERATION("Sdch3.ResponseCorruptionDetection.Uncached",
365 cause, RESPONSE_MAX);
366 }
367 net_log_.AddEvent(NetLogEventType::SDCH_RESPONSE_CORRUPTION_DETECTION,
368 base::Bind(&NetLogResponseCorruptionDetectionCallback,
369 cause, is_cached_content_));
370 }
371
372 // static.
373 const char* SdchPolicyDelegate::ResponseCorruptionDetectionCauseToString(
374 ResponseCorruptionDetectionCause cause) {
375 const char* cause_string = "<unknown>";
376 switch (cause) {
377 case RESPONSE_NONE:
378 cause_string = "NONE";
379 break;
380 case RESPONSE_404:
381 cause_string = "404";
382 break;
383 case RESPONSE_NOT_200:
384 cause_string = "NOT_200";
385 break;
386 case RESPONSE_OLD_UNENCODED:
387 cause_string = "OLD_UNENCODED";
388 break;
389 case RESPONSE_TENTATIVE_SDCH:
390 cause_string = "TENTATIVE_SDCH";
391 break;
392 case RESPONSE_NO_DICTIONARY:
393 cause_string = "NO_DICTIONARY";
394 break;
395 case RESPONSE_CORRUPT_SDCH:
396 cause_string = "CORRUPT_SDCH";
397 break;
398 case RESPONSE_ENCODING_LIE:
399 cause_string = "ENCODING_LIE";
400 break;
401 case RESPONSE_MAX:
402 cause_string = "<Error: max enum value>";
403 break;
404 }
405 return cause_string;
406 }
407
408 // static.
409 std::unique_ptr<base::Value>
410 SdchPolicyDelegate::NetLogResponseCorruptionDetectionCallback(
411 ResponseCorruptionDetectionCause cause,
412 bool cached,
413 NetLogCaptureMode capture_mode) {
414 std::unique_ptr<base::DictionaryValue> dict(new base::DictionaryValue());
415 dict->SetString("cause", ResponseCorruptionDetectionCauseToString(cause));
416 dict->SetBoolean("cached", cached);
417 return std::move(dict);
418 }
419
420 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698