| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef NET_HTTP_HTTP_UTIL_H_ | |
| 6 #define NET_HTTP_HTTP_UTIL_H_ | |
| 7 | |
| 8 #include <string> | |
| 9 #include <vector> | |
| 10 | |
| 11 #include "base/memory/ref_counted.h" | |
| 12 #include "base/strings/string_tokenizer.h" | |
| 13 #include "base/time/time.h" | |
| 14 #include "net/base/net_export.h" | |
| 15 #include "net/http/http_byte_range.h" | |
| 16 #include "net/http/http_version.h" | |
| 17 #include "url/gurl.h" | |
| 18 | |
| 19 // This is a macro to support extending this string literal at compile time. | |
| 20 // Please excuse me polluting your global namespace! | |
| 21 #define HTTP_LWS " \t" | |
| 22 | |
| 23 namespace net { | |
| 24 | |
| 25 class NET_EXPORT HttpUtil { | |
| 26 public: | |
| 27 // Returns the absolute path of the URL, to be used for the http request. | |
| 28 // The absolute path starts with a '/' and may contain a query. | |
| 29 static std::string PathForRequest(const GURL& url); | |
| 30 | |
| 31 // Returns the absolute URL, to be used for the http request. This url is | |
| 32 // made up of the protocol, host, [port], path, [query]. Everything else | |
| 33 // is stripped (username, password, reference). | |
| 34 static std::string SpecForRequest(const GURL& url); | |
| 35 | |
| 36 // Locates the next occurance of delimiter in line, skipping over quoted | |
| 37 // strings (e.g., commas will not be treated as delimiters if they appear | |
| 38 // within a quoted string). Returns the offset of the found delimiter or | |
| 39 // line.size() if no delimiter was found. | |
| 40 static size_t FindDelimiter(const std::string& line, | |
| 41 size_t search_start, | |
| 42 char delimiter); | |
| 43 | |
| 44 // Parses the value of a Content-Type header. The resulting mime_type and | |
| 45 // charset values are normalized to lowercase. The mime_type and charset | |
| 46 // output values are only modified if the content_type_str contains a mime | |
| 47 // type and charset value, respectively. The boundary output value is | |
| 48 // optional and will be assigned the (quoted) value of the boundary | |
| 49 // paramter, if any. | |
| 50 static void ParseContentType(const std::string& content_type_str, | |
| 51 std::string* mime_type, | |
| 52 std::string* charset, | |
| 53 bool* had_charset, | |
| 54 std::string* boundary); | |
| 55 | |
| 56 // Scans the headers and look for the first "Range" header in |headers|, | |
| 57 // if "Range" exists and the first one of it is well formatted then returns | |
| 58 // true, |ranges| will contain a list of valid ranges. If return | |
| 59 // value is false then values in |ranges| should not be used. The format of | |
| 60 // "Range" header is defined in RFC 7233 Section 2.1. | |
| 61 // https://tools.ietf.org/html/rfc7233#section-2.1 | |
| 62 static bool ParseRanges(const std::string& headers, | |
| 63 std::vector<HttpByteRange>* ranges); | |
| 64 | |
| 65 // Same thing as ParseRanges except the Range header is known and its value | |
| 66 // is directly passed in, rather than requiring searching through a string. | |
| 67 static bool ParseRangeHeader(const std::string& range_specifier, | |
| 68 std::vector<HttpByteRange>* ranges); | |
| 69 | |
| 70 // Parses a Retry-After header that is either an absolute date/time or a | |
| 71 // number of seconds in the future. Interprets absolute times as relative to | |
| 72 // |now|. If |retry_after_string| is successfully parsed and indicates a time | |
| 73 // that is not in the past, fills in |*retry_after| and returns true; | |
| 74 // otherwise, returns false. | |
| 75 static bool ParseRetryAfterHeader(const std::string& retry_after_string, | |
| 76 base::Time now, | |
| 77 base::TimeDelta* retry_after); | |
| 78 | |
| 79 // Scans the '\r\n'-delimited headers for the given header name. Returns | |
| 80 // true if a match is found. Input is assumed to be well-formed. | |
| 81 // TODO(darin): kill this | |
| 82 static bool HasHeader(const std::string& headers, const char* name); | |
| 83 | |
| 84 // Returns true if it is safe to allow users and scripts to specify the header | |
| 85 // named |name|. | |
| 86 static bool IsSafeHeader(const std::string& name); | |
| 87 | |
| 88 // Returns true if |name| is a valid HTTP header name. | |
| 89 static bool IsValidHeaderName(const std::string& name); | |
| 90 | |
| 91 // Returns false if |value| contains NUL or CRLF. This method does not perform | |
| 92 // a fully RFC-2616-compliant header value validation. | |
| 93 static bool IsValidHeaderValue(const std::string& value); | |
| 94 | |
| 95 // Strips all header lines from |headers| whose name matches | |
| 96 // |headers_to_remove|. |headers_to_remove| is a list of null-terminated | |
| 97 // lower-case header names, with array length |headers_to_remove_len|. | |
| 98 // Returns the stripped header lines list, separated by "\r\n". | |
| 99 static std::string StripHeaders(const std::string& headers, | |
| 100 const char* const headers_to_remove[], | |
| 101 size_t headers_to_remove_len); | |
| 102 | |
| 103 // Multiple occurances of some headers cannot be coalesced into a comma- | |
| 104 // separated list since their values are (or contain) unquoted HTTP-date | |
| 105 // values, which may contain a comma (see RFC 2616 section 3.3.1). | |
| 106 static bool IsNonCoalescingHeader(std::string::const_iterator name_begin, | |
| 107 std::string::const_iterator name_end); | |
| 108 static bool IsNonCoalescingHeader(const std::string& name) { | |
| 109 return IsNonCoalescingHeader(name.begin(), name.end()); | |
| 110 } | |
| 111 | |
| 112 // Return true if the character is HTTP "linear white space" (SP | HT). | |
| 113 // This definition corresponds with the HTTP_LWS macro, and does not match | |
| 114 // newlines. | |
| 115 static bool IsLWS(char c); | |
| 116 | |
| 117 // Trim HTTP_LWS chars from the beginning and end of the string. | |
| 118 static void TrimLWS(std::string::const_iterator* begin, | |
| 119 std::string::const_iterator* end); | |
| 120 | |
| 121 // Whether the character is the start of a quotation mark. | |
| 122 static bool IsQuote(char c); | |
| 123 | |
| 124 // Whether the string is a valid |token| as defined in RFC 2616 Sec 2.2. | |
| 125 static bool IsToken(std::string::const_iterator begin, | |
| 126 std::string::const_iterator end); | |
| 127 static bool IsToken(const std::string& str) { | |
| 128 return IsToken(str.begin(), str.end()); | |
| 129 } | |
| 130 | |
| 131 // RFC 2616 Sec 2.2: | |
| 132 // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) | |
| 133 // Unquote() strips the surrounding quotemarks off a string, and unescapes | |
| 134 // any quoted-pair to obtain the value contained by the quoted-string. | |
| 135 // If the input is not quoted, then it works like the identity function. | |
| 136 static std::string Unquote(std::string::const_iterator begin, | |
| 137 std::string::const_iterator end); | |
| 138 | |
| 139 // Same as above. | |
| 140 static std::string Unquote(const std::string& str); | |
| 141 | |
| 142 // The reverse of Unquote() -- escapes and surrounds with " | |
| 143 static std::string Quote(const std::string& str); | |
| 144 | |
| 145 // Returns the start of the status line, or -1 if no status line was found. | |
| 146 // This allows for 4 bytes of junk to precede the status line (which is what | |
| 147 // mozilla does too). | |
| 148 static int LocateStartOfStatusLine(const char* buf, int buf_len); | |
| 149 | |
| 150 // Returns index beyond the end-of-headers marker or -1 if not found. RFC | |
| 151 // 2616 defines the end-of-headers marker as a double CRLF; however, some | |
| 152 // servers only send back LFs (e.g., Unix-based CGI scripts written using the | |
| 153 // ASIS Apache module). This function therefore accepts the pattern LF[CR]LF | |
| 154 // as end-of-headers (just like Mozilla). | |
| 155 // The parameter |i| is the offset within |buf| to begin searching from. | |
| 156 static int LocateEndOfHeaders(const char* buf, int buf_len, int i = 0); | |
| 157 | |
| 158 // Assemble "raw headers" in the format required by HttpResponseHeaders. | |
| 159 // This involves normalizing line terminators, converting [CR]LF to \0 and | |
| 160 // handling HTTP line continuations (i.e., lines starting with LWS are | |
| 161 // continuations of the previous line). |buf_len| indicates the position of | |
| 162 // the end-of-headers marker as defined by LocateEndOfHeaders. | |
| 163 // If a \0 appears within the headers themselves, it will be stripped. This | |
| 164 // is a workaround to avoid later code from incorrectly interpreting it as | |
| 165 // a line terminator. | |
| 166 // | |
| 167 // TODO(eroman): we should use \n as the canonical line separator rather than | |
| 168 // \0 to avoid this problem. Unfortunately the persistence layer | |
| 169 // is already dependent on newlines being replaced by NULL so | |
| 170 // this is hard to change without breaking things. | |
| 171 static std::string AssembleRawHeaders(const char* buf, int buf_len); | |
| 172 | |
| 173 // Converts assembled "raw headers" back to the HTTP response format. That is | |
| 174 // convert each \0 occurence to CRLF. This is used by DevTools. | |
| 175 // Since all line continuations info is already lost at this point, the result | |
| 176 // consists of status line and then one line for each header. | |
| 177 static std::string ConvertHeadersBackToHTTPResponse(const std::string& str); | |
| 178 | |
| 179 // Given a comma separated ordered list of language codes, return | |
| 180 // the list with a qvalue appended to each language. | |
| 181 // The way qvalues are assigned is rather simple. The qvalue | |
| 182 // starts with 1.0 and is decremented by 0.2 for each successive entry | |
| 183 // in the list until it reaches 0.2. All the entries after that are | |
| 184 // assigned the same qvalue of 0.2. Also, note that the 1st language | |
| 185 // will not have a qvalue added because the absence of a qvalue implicitly | |
| 186 // means q=1.0. | |
| 187 // | |
| 188 // When making a http request, this should be used to determine what | |
| 189 // to put in Accept-Language header. If a comma separated list of language | |
| 190 // codes *without* qvalue is sent, web servers regard all | |
| 191 // of them as having q=1.0 and pick one of them even though it may not | |
| 192 // be at the beginning of the list (see http://crbug.com/5899). | |
| 193 static std::string GenerateAcceptLanguageHeader( | |
| 194 const std::string& raw_language_list); | |
| 195 | |
| 196 // Helper. If |*headers| already contains |header_name| do nothing, | |
| 197 // otherwise add <header_name> ": " <header_value> to the end of the list. | |
| 198 static void AppendHeaderIfMissing(const char* header_name, | |
| 199 const std::string& header_value, | |
| 200 std::string* headers); | |
| 201 | |
| 202 // Returns true if the parameters describe a response with a strong etag or | |
| 203 // last-modified header. See section 13.3.3 of RFC 2616. | |
| 204 static bool HasStrongValidators(HttpVersion version, | |
| 205 const std::string& etag_header, | |
| 206 const std::string& last_modified_header, | |
| 207 const std::string& date_header); | |
| 208 | |
| 209 // Gets a vector of common HTTP status codes for histograms of status | |
| 210 // codes. Currently returns everything in the range [100, 600), plus 0 | |
| 211 // (for invalid responses/status codes). | |
| 212 static std::vector<int> GetStatusCodesForHistogram(); | |
| 213 | |
| 214 // Maps an HTTP status code to one of the status codes in the vector | |
| 215 // returned by GetStatusCodesForHistogram. | |
| 216 static int MapStatusCodeForHistogram(int code); | |
| 217 | |
| 218 // Used to iterate over the name/value pairs of HTTP headers. To iterate | |
| 219 // over the values in a multi-value header, use ValuesIterator. | |
| 220 // See AssembleRawHeaders for joining line continuations (this iterator | |
| 221 // does not expect any). | |
| 222 class NET_EXPORT HeadersIterator { | |
| 223 public: | |
| 224 HeadersIterator(std::string::const_iterator headers_begin, | |
| 225 std::string::const_iterator headers_end, | |
| 226 const std::string& line_delimiter); | |
| 227 ~HeadersIterator(); | |
| 228 | |
| 229 // Advances the iterator to the next header, if any. Returns true if there | |
| 230 // is a next header. Use name* and values* methods to access the resultant | |
| 231 // header name and values. | |
| 232 bool GetNext(); | |
| 233 | |
| 234 // Iterates through the list of headers, starting with the current position | |
| 235 // and looks for the specified header. Note that the name _must_ be | |
| 236 // lower cased. | |
| 237 // If the header was found, the return value will be true and the current | |
| 238 // position points to the header. If the return value is false, the | |
| 239 // current position will be at the end of the headers. | |
| 240 bool AdvanceTo(const char* lowercase_name); | |
| 241 | |
| 242 void Reset() { | |
| 243 lines_.Reset(); | |
| 244 } | |
| 245 | |
| 246 std::string::const_iterator name_begin() const { | |
| 247 return name_begin_; | |
| 248 } | |
| 249 std::string::const_iterator name_end() const { | |
| 250 return name_end_; | |
| 251 } | |
| 252 std::string name() const { | |
| 253 return std::string(name_begin_, name_end_); | |
| 254 } | |
| 255 | |
| 256 std::string::const_iterator values_begin() const { | |
| 257 return values_begin_; | |
| 258 } | |
| 259 std::string::const_iterator values_end() const { | |
| 260 return values_end_; | |
| 261 } | |
| 262 std::string values() const { | |
| 263 return std::string(values_begin_, values_end_); | |
| 264 } | |
| 265 | |
| 266 private: | |
| 267 base::StringTokenizer lines_; | |
| 268 std::string::const_iterator name_begin_; | |
| 269 std::string::const_iterator name_end_; | |
| 270 std::string::const_iterator values_begin_; | |
| 271 std::string::const_iterator values_end_; | |
| 272 }; | |
| 273 | |
| 274 // Iterates over delimited values in an HTTP header. HTTP LWS is | |
| 275 // automatically trimmed from the resulting values. | |
| 276 // | |
| 277 // When using this class to iterate over response header values, be aware that | |
| 278 // for some headers (e.g., Last-Modified), commas are not used as delimiters. | |
| 279 // This iterator should be avoided for headers like that which are considered | |
| 280 // non-coalescing (see IsNonCoalescingHeader). | |
| 281 // | |
| 282 // This iterator is careful to skip over delimiters found inside an HTTP | |
| 283 // quoted string. | |
| 284 // | |
| 285 class NET_EXPORT_PRIVATE ValuesIterator { | |
| 286 public: | |
| 287 ValuesIterator(std::string::const_iterator values_begin, | |
| 288 std::string::const_iterator values_end, | |
| 289 char delimiter); | |
| 290 ~ValuesIterator(); | |
| 291 | |
| 292 // Advances the iterator to the next value, if any. Returns true if there | |
| 293 // is a next value. Use value* methods to access the resultant value. | |
| 294 bool GetNext(); | |
| 295 | |
| 296 std::string::const_iterator value_begin() const { | |
| 297 return value_begin_; | |
| 298 } | |
| 299 std::string::const_iterator value_end() const { | |
| 300 return value_end_; | |
| 301 } | |
| 302 std::string value() const { | |
| 303 return std::string(value_begin_, value_end_); | |
| 304 } | |
| 305 | |
| 306 private: | |
| 307 base::StringTokenizer values_; | |
| 308 std::string::const_iterator value_begin_; | |
| 309 std::string::const_iterator value_end_; | |
| 310 }; | |
| 311 | |
| 312 // Iterates over a delimited sequence of name-value pairs in an HTTP header. | |
| 313 // Each pair consists of a token (the name), an equals sign, and either a | |
| 314 // token or quoted-string (the value). Arbitrary HTTP LWS is permitted outside | |
| 315 // of and between names, values, and delimiters. | |
| 316 // | |
| 317 // String iterators returned from this class' methods may be invalidated upon | |
| 318 // calls to GetNext() or after the NameValuePairsIterator is destroyed. | |
| 319 class NET_EXPORT NameValuePairsIterator { | |
| 320 public: | |
| 321 NameValuePairsIterator(std::string::const_iterator begin, | |
| 322 std::string::const_iterator end, | |
| 323 char delimiter); | |
| 324 ~NameValuePairsIterator(); | |
| 325 | |
| 326 // Advances the iterator to the next pair, if any. Returns true if there | |
| 327 // is a next pair. Use name* and value* methods to access the resultant | |
| 328 // value. | |
| 329 bool GetNext(); | |
| 330 | |
| 331 // Returns false if there was a parse error. | |
| 332 bool valid() const { return valid_; } | |
| 333 | |
| 334 // The name of the current name-value pair. | |
| 335 std::string::const_iterator name_begin() const { return name_begin_; } | |
| 336 std::string::const_iterator name_end() const { return name_end_; } | |
| 337 std::string name() const { return std::string(name_begin_, name_end_); } | |
| 338 | |
| 339 // The value of the current name-value pair. | |
| 340 std::string::const_iterator value_begin() const { | |
| 341 return value_is_quoted_ ? unquoted_value_.begin() : value_begin_; | |
| 342 } | |
| 343 std::string::const_iterator value_end() const { | |
| 344 return value_is_quoted_ ? unquoted_value_.end() : value_end_; | |
| 345 } | |
| 346 std::string value() const { | |
| 347 return value_is_quoted_ ? unquoted_value_ : std::string(value_begin_, | |
| 348 value_end_); | |
| 349 } | |
| 350 | |
| 351 // The value before unquoting (if any). | |
| 352 std::string raw_value() const { return std::string(value_begin_, | |
| 353 value_end_); } | |
| 354 | |
| 355 private: | |
| 356 HttpUtil::ValuesIterator props_; | |
| 357 bool valid_; | |
| 358 | |
| 359 std::string::const_iterator name_begin_; | |
| 360 std::string::const_iterator name_end_; | |
| 361 | |
| 362 std::string::const_iterator value_begin_; | |
| 363 std::string::const_iterator value_end_; | |
| 364 | |
| 365 // Do not store iterators into this string. The NameValuePairsIterator | |
| 366 // is copyable/assignable, and if copied the copy's iterators would point | |
| 367 // into the original's unquoted_value_ member. | |
| 368 std::string unquoted_value_; | |
| 369 | |
| 370 bool value_is_quoted_; | |
| 371 }; | |
| 372 }; | |
| 373 | |
| 374 } // namespace net | |
| 375 | |
| 376 #endif // NET_HTTP_HTTP_UTIL_H_ | |
| OLD | NEW |