| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/filter/sdch_filter.h" | 5 #include "net/filter/sdch_filter.h" |
| 6 | 6 |
| 7 #include <ctype.h> | 7 #include <ctype.h> |
| 8 #include <limits.h> | 8 #include <limits.h> |
| 9 | 9 |
| 10 #include <algorithm> | 10 #include <algorithm> |
| 11 | 11 |
| 12 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/metrics/histogram.h" | 13 #include "base/metrics/histogram.h" |
| 14 #include "net/base/sdch_manager.h" | 14 #include "net/base/sdch_manager.h" |
| 15 #include "net/url_request/url_request_context.h" | 15 #include "net/url_request/url_request_context.h" |
| 16 | 16 |
| 17 #include "sdch/open-vcdiff/src/google/vcdecoder.h" | 17 #include "sdch/open-vcdiff/src/google/vcdecoder.h" |
| 18 | 18 |
| 19 namespace net { | 19 namespace net { |
| 20 | 20 |
| 21 namespace { |
| 22 |
| 23 // Disambiguate various types of responses that trigger a meta-refresh, |
| 24 // failure, or fallback to pass-through. |
| 25 enum ResponseCorruptionDetectionCause { |
| 26 RESPONSE_NONE, |
| 27 |
| 28 // 404 Http Response Code |
| 29 RESPONSE_404 = 1, |
| 30 |
| 31 // Not a 200 Http Response Code |
| 32 RESPONSE_NOT_200 = 2, |
| 33 |
| 34 // Cached before dictionary retrieved. |
| 35 RESPONSE_OLD_UNENCODED = 3, |
| 36 |
| 37 // Speculative but incorrect SDCH filtering was added added. |
| 38 RESPONSE_TENTATIVE_SDCH = 4, |
| 39 |
| 40 // Missing correct dict for decoding. |
| 41 RESPONSE_NO_DICTIONARY = 5, |
| 42 |
| 43 // Not an SDCH response but should be. |
| 44 RESPONSE_CORRUPT_SDCH = 6, |
| 45 |
| 46 // No dictionary was advertised with the request, the server claims |
| 47 // to have encoded with SDCH anyway, but it isn't an SDCH response. |
| 48 RESPONSE_ENCODING_LIE = 7, |
| 49 |
| 50 RESPONSE_MAX, |
| 51 }; |
| 52 |
| 53 } // namespace |
| 54 |
| 21 SdchFilter::SdchFilter(const FilterContext& filter_context) | 55 SdchFilter::SdchFilter(const FilterContext& filter_context) |
| 22 : filter_context_(filter_context), | 56 : filter_context_(filter_context), |
| 23 decoding_status_(DECODING_UNINITIALIZED), | 57 decoding_status_(DECODING_UNINITIALIZED), |
| 24 dictionary_hash_(), | 58 dictionary_hash_(), |
| 25 dictionary_hash_is_plausible_(false), | 59 dictionary_hash_is_plausible_(false), |
| 26 dictionary_(NULL), | 60 dictionary_(NULL), |
| 27 url_request_context_(filter_context.GetURLRequestContext()), | 61 url_request_context_(filter_context.GetURLRequestContext()), |
| 28 dest_buffer_excess_(), | 62 dest_buffer_excess_(), |
| 29 dest_buffer_excess_index_(0), | 63 dest_buffer_excess_index_(0), |
| 30 source_bytes_(0), | 64 source_bytes_(0), |
| (...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 164 DCHECK(dest_buffer_excess_.empty()); | 198 DCHECK(dest_buffer_excess_.empty()); |
| 165 // This is where we try very hard to do error recovery, and make this | 199 // This is where we try very hard to do error recovery, and make this |
| 166 // protocol robust in the face of proxies that do many different things. | 200 // protocol robust in the face of proxies that do many different things. |
| 167 // If we decide that things are looking very bad (too hard to recover), | 201 // If we decide that things are looking very bad (too hard to recover), |
| 168 // we may even issue a "meta-refresh" to reload the page without an SDCH | 202 // we may even issue a "meta-refresh" to reload the page without an SDCH |
| 169 // advertisement (so that we are sure we're not hurting anything). | 203 // advertisement (so that we are sure we're not hurting anything). |
| 170 // | 204 // |
| 171 // Watch out for an error page inserted by the proxy as part of a 40x | 205 // Watch out for an error page inserted by the proxy as part of a 40x |
| 172 // error response. When we see such content molestation, we certainly | 206 // error response. When we see such content molestation, we certainly |
| 173 // need to fall into the meta-refresh case. | 207 // need to fall into the meta-refresh case. |
| 208 ResponseCorruptionDetectionCause cause = RESPONSE_NONE; |
| 174 if (filter_context_.GetResponseCode() == 404) { | 209 if (filter_context_.GetResponseCode() == 404) { |
| 175 // We could be more generous, but for now, only a "NOT FOUND" code will | 210 // We could be more generous, but for now, only a "NOT FOUND" code will |
| 176 // cause a pass through. All other bad codes will fall into a | 211 // cause a pass through. All other bad codes will fall into a |
| 177 // meta-refresh. | 212 // meta-refresh. |
| 178 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE); | 213 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE); |
| 214 cause = RESPONSE_404; |
| 179 decoding_status_ = PASS_THROUGH; | 215 decoding_status_ = PASS_THROUGH; |
| 180 } else if (filter_context_.GetResponseCode() != 200) { | 216 } else if (filter_context_.GetResponseCode() != 200) { |
| 181 // We need to meta-refresh, with SDCH disabled. | 217 // We need to meta-refresh, with SDCH disabled. |
| 218 cause = RESPONSE_NOT_200; |
| 182 } else if (filter_context_.IsCachedContent() | 219 } else if (filter_context_.IsCachedContent() |
| 183 && !dictionary_hash_is_plausible_) { | 220 && !dictionary_hash_is_plausible_) { |
| 184 // We must have hit the back button, and gotten content that was fetched | 221 // We must have hit the back button, and gotten content that was fetched |
| 185 // before we *really* advertised SDCH and a dictionary. | 222 // before we *really* advertised SDCH and a dictionary. |
| 186 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED); | 223 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED); |
| 187 decoding_status_ = PASS_THROUGH; | 224 decoding_status_ = PASS_THROUGH; |
| 225 cause = RESPONSE_OLD_UNENCODED; |
| 188 } else if (possible_pass_through_) { | 226 } else if (possible_pass_through_) { |
| 189 // This is the potentially most graceful response. There really was no | 227 // This is the potentially most graceful response. There really was no |
| 190 // error. We were just overly cautious when we added a TENTATIVE_SDCH. | 228 // error. We were just overly cautious when we added a TENTATIVE_SDCH. |
| 191 // We added the sdch coding tag, and it should not have been added. | 229 // We added the sdch coding tag, and it should not have been added. |
| 192 // This can happen in server experiments, where the server decides | 230 // This can happen in server experiments, where the server decides |
| 193 // not to use sdch, even though there is a dictionary. To be | 231 // not to use sdch, even though there is a dictionary. To be |
| 194 // conservative, we locally added the tentative sdch (fearing that a | 232 // conservative, we locally added the tentative sdch (fearing that a |
| 195 // proxy stripped it!) and we must now recant (pass through). | 233 // proxy stripped it!) and we must now recant (pass through). |
| 196 SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH); | 234 // |
| 197 // However.... just to be sure we don't get burned by proxies that | 235 // However.... just to be sure we don't get burned by proxies that |
| 198 // re-compress with gzip or other system, we can sniff to see if this | 236 // re-compress with gzip or other system, we can sniff to see if this |
| 199 // is compressed data etc. For now, we do nothing, which gets us into | 237 // is compressed data etc. For now, we do nothing, which gets us into |
| 200 // the meta-refresh result. | 238 // the meta-refresh result. |
| 201 // TODO(jar): Improve robustness by sniffing for valid text that we can | 239 // TODO(jar): Improve robustness by sniffing for valid text that we can |
| 202 // actual use re: decoding_status_ = PASS_THROUGH; | 240 // actual use re: decoding_status_ = PASS_THROUGH; |
| 241 cause = RESPONSE_TENTATIVE_SDCH; |
| 203 } else if (dictionary_hash_is_plausible_) { | 242 } else if (dictionary_hash_is_plausible_) { |
| 204 // We need a meta-refresh since we don't have the dictionary. | 243 // We need a meta-refresh since we don't have the dictionary. |
| 205 // The common cause is a restart of the browser, where we try to render | 244 // The common cause is a restart of the browser, where we try to render |
| 206 // cached content that was saved when we had a dictionary. | 245 // cached content that was saved when we had a dictionary. |
| 207 } else if (filter_context_.IsSdchResponse()) { | 246 cause = RESPONSE_NO_DICTIONARY; |
| 247 } else if (filter_context_.SdchResponseExpected()) { |
| 208 // This is a very corrupt SDCH request response. We can't decode it. | 248 // This is a very corrupt SDCH request response. We can't decode it. |
| 209 // We'll use a meta-refresh, and get content without asking for SDCH. | 249 // We'll use a meta-refresh, and get content without asking for SDCH. |
| 210 // This will also progressively disable SDCH for this domain. | 250 // This will also progressively disable SDCH for this domain. |
| 251 cause = RESPONSE_CORRUPT_SDCH; |
| 211 } else { | 252 } else { |
| 212 // One of the first 9 bytes precluded consideration as a hash. | 253 // One of the first 9 bytes precluded consideration as a hash. |
| 213 // This can't be an SDCH payload, even though the server said it was. | 254 // This can't be an SDCH payload, even though the server said it was. |
| 214 // This is a major error, as the server or proxy tagged this SDCH even | 255 // This is a major error, as the server or proxy tagged this SDCH even |
| 215 // though it is not! | 256 // though it is not! |
| 216 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!! | 257 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!! |
| 217 // Worse yet, meta-refresh could lead to an infinite refresh loop. | 258 // Worse yet, meta-refresh could lead to an infinite refresh loop. |
| 218 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH); | 259 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH); |
| 219 decoding_status_ = PASS_THROUGH; | 260 decoding_status_ = PASS_THROUGH; |
| 220 // ... but further back-off on advertising SDCH support. | 261 // ... but further back-off on advertising SDCH support. |
| 221 url_request_context_->sdch_manager()->BlacklistDomain( | 262 url_request_context_->sdch_manager()->BlacklistDomain( |
| 222 url_, SdchManager::PASSING_THROUGH_NON_SDCH); | 263 url_, SdchManager::PASSING_THROUGH_NON_SDCH); |
| 264 cause = RESPONSE_ENCODING_LIE; |
| 265 } |
| 266 DCHECK_NE(RESPONSE_NONE, cause); |
| 267 |
| 268 // Use if statement rather than ?: because UMA_HISTOGRAM_ENUMERATION |
| 269 // caches the histogram name based on the call site. |
| 270 if (filter_context_.IsCachedContent()) { |
| 271 UMA_HISTOGRAM_ENUMERATION( |
| 272 "Sdch3.ResponseCorruptionDetection.Cached", cause, RESPONSE_MAX); |
| 273 } else { |
| 274 UMA_HISTOGRAM_ENUMERATION( |
| 275 "Sdch3.ResponseCorruptionDetection.Uncached", cause, RESPONSE_MAX); |
| 223 } | 276 } |
| 224 | 277 |
| 225 if (decoding_status_ == PASS_THROUGH) { | 278 if (decoding_status_ == PASS_THROUGH) { |
| 226 dest_buffer_excess_ = dictionary_hash_; // Send what we scanned. | 279 dest_buffer_excess_ = dictionary_hash_; // Send what we scanned. |
| 227 } else { | 280 } else { |
| 228 // This is where we try to do the expensive meta-refresh. | 281 // This is where we try to do the expensive meta-refresh. |
| 229 if (std::string::npos == mime_type_.find("text/html")) { | 282 if (std::string::npos == mime_type_.find("text/html")) { |
| 230 // Since we can't do a meta-refresh (along with an exponential | 283 // Since we can't do a meta-refresh (along with an exponential |
| 231 // backoff), we'll just make sure this NEVER happens again. | 284 // backoff), we'll just make sure this NEVER happens again. |
| 232 SdchManager::ProblemCodes problem = | 285 SdchManager::ProblemCodes problem = |
| (...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 387 dest_buffer_excess_index_ += amount; | 440 dest_buffer_excess_index_ += amount; |
| 388 if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) { | 441 if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) { |
| 389 DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_); | 442 DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_); |
| 390 dest_buffer_excess_.clear(); | 443 dest_buffer_excess_.clear(); |
| 391 dest_buffer_excess_index_ = 0; | 444 dest_buffer_excess_index_ = 0; |
| 392 } | 445 } |
| 393 return amount; | 446 return amount; |
| 394 } | 447 } |
| 395 | 448 |
| 396 } // namespace net | 449 } // namespace net |
| OLD | NEW |