| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // URL filename encoder goals: | |
| 6 // | |
| 7 // 1. Allow URLs with arbitrary path-segment length, generating filenames | |
| 8 // with a maximum of 128 characters. | |
| 9 // 2. Provide a somewhat human readable filenames, for easy debugging flow. | |
| 10 // 3. Provide reverse-mapping from filenames back to URLs. | |
| 11 // 4. Be able to distinguish http://x from http://x/ from http://x/index.html. | |
| 12 // Those can all be different URLs. | |
| 13 // 5. Be able to represent http://a/b/c and http://a/b/c/d, a pattern seen | |
| 14 // with Facebook Connect. | |
| 15 // | |
| 16 // We need an escape-character for representing characters that are legal | |
| 17 // in URL paths, but not in filenames, such as '?'. | |
| 18 // | |
| 19 // We can pick any legal character as an escape, as long as we escape it too. | |
| 20 // But as we have a goal of having filenames that humans can correlate with | |
| 21 // URLs, we should pick one that doesn't show up frequently in URLs. Candidates | |
| 22 // are ~`!@#$%^&()-=_+{}[],. but we would prefer to avoid characters that are | |
| 23 // shell escapes or that various build tools use. | |
| 24 // | |
| 25 // .#&%-=_+ occur frequently in URLs. | |
| 26 // <>:"/\|?* are illegal in Windows | |
| 27 // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx | |
| 28 // ~`!$^&(){}[]'; are special to Unix shells | |
| 29 // In addition, build tools do not like ^@#% | |
| 30 // | |
| 31 // Josh took a quick look at the frequency of some special characters in | |
| 32 // Sadeesh's slurped directory from Fall 09 and found the following occurances: | |
| 33 // | |
| 34 // ^ 3 build tool doesn't like ^ in testdata filenames | |
| 35 // @ 10 build tool doesn't like @ in testdata filenames | |
| 36 // . 1676 too frequent in URLs | |
| 37 // , 76 THE WINNER | |
| 38 // # 0 build tool doesn't like it | |
| 39 // & 487 Prefer to avoid shell escapes | |
| 40 // % 374 g4 doesn't like it | |
| 41 // = 579 very frequent in URLs -- leave unmodified | |
| 42 // - 464 very frequent in URLs -- leave unmodified | |
| 43 // _ 798 very frequent in URLs -- leave unmodified | |
| 44 // | |
| 45 // | |
| 46 // The escaping algorithm is: | |
| 47 // 1) Escape all unfriendly symbols as ,XX where XX is the hex code. | |
| 48 // 2) Add a ',' at the end (We do not allow ',' at end of any directory name, | |
| 49 // so this assures that e.g. /a and /a/b can coexist in the filesystem). | |
| 50 // 3) Go through the path segment by segment (where a segment is one directory | |
| 51 // or leaf in the path) and | |
| 52 // 3a) If the segment is empty, escape the second slash. i.e. if it was | |
| 53 // www.foo.com//a then we escape the second / like www.foo.com/,2Fa, | |
| 54 // 3a) If it is "." or ".." prepend with ',' (so that we have a non- | |
| 55 // empty and non-reserved filename). | |
| 56 // 3b) If it is over 128 characters, break it up into smaller segments by | |
| 57 // inserting ,-/ (Windows limits paths to 128 chars, other OSes also | |
| 58 // have limits that would restrict us) | |
| 59 // | |
| 60 // For example: | |
| 61 // URL File | |
| 62 // / /, | |
| 63 // /index.html /index.html, | |
| 64 // /. /., | |
| 65 // /a/b /a/b, | |
| 66 // /a/b/ /a/b/, | |
| 67 // /a/b/c /a/b/c, Note: no prefix problem | |
| 68 // /u?foo=bar /u,3Ffoo=bar, | |
| 69 // // /,2F, | |
| 70 // /./ /,./, | |
| 71 // /../ /,../, | |
| 72 // /, /,2C, | |
| 73 // /,./ /,2C./, | |
| 74 // /very...longname/ /very...long,-/name If very...long is about 126 long. | |
| 75 | |
| 76 // NOTE: we avoid using some classes here (like FilePath and GURL) because we | |
| 77 // share this code with other projects externally. | |
| 78 | |
| 79 #ifndef NET_TOOLS_FLIP_SERVER_URL_TO_FILENAME_ENCODER_H_ | |
| 80 #define NET_TOOLS_FLIP_SERVER_URL_TO_FILENAME_ENCODER_H_ | |
| 81 | |
| 82 #include <stddef.h> | |
| 83 | |
| 84 #include <string> | |
| 85 | |
| 86 #include "base/strings/string_util.h" | |
| 87 #include "net/tools/flip_server/url_utilities.h" | |
| 88 | |
| 89 namespace net { | |
| 90 | |
| 91 // Helper class for converting a URL into a filename. | |
| 92 class UrlToFilenameEncoder { | |
| 93 public: | |
| 94 // Given a |url| and a |base_path|, returns a filename which represents this | |
| 95 // |url|. |url| may include URL escaping such as %21 for ! | |
| 96 // |legacy_escape| indicates that this function should use the old-style | |
| 97 // of encoding. | |
| 98 // TODO(mbelshe): delete the legacy_escape code. | |
| 99 static std::string Encode(const std::string& url, | |
| 100 std::string base_path, | |
| 101 bool legacy_escape) { | |
| 102 std::string filename; | |
| 103 if (!legacy_escape) { | |
| 104 std::string url_no_scheme = UrlUtilities::GetUrlHostPath(url); | |
| 105 EncodeSegment(base_path, url_no_scheme, '/', &filename); | |
| 106 #ifdef WIN32 | |
| 107 ReplaceAll(&filename, "/", "\\"); | |
| 108 #endif | |
| 109 } else { | |
| 110 std::string clean_url(url); | |
| 111 if (clean_url.length() && clean_url.back() == '/') | |
| 112 clean_url.append("index.html"); | |
| 113 | |
| 114 std::string host = UrlUtilities::GetUrlHost(clean_url); | |
| 115 filename.append(base_path); | |
| 116 filename.append(host); | |
| 117 #ifdef WIN32 | |
| 118 filename.append("\\"); | |
| 119 #else | |
| 120 filename.append("/"); | |
| 121 #endif | |
| 122 | |
| 123 std::string url_filename = UrlUtilities::GetUrlPath(clean_url); | |
| 124 // Strip the leading '/'. | |
| 125 if (url_filename[0] == '/') | |
| 126 url_filename = url_filename.substr(1); | |
| 127 | |
| 128 // Replace '/' with '\'. | |
| 129 ConvertToSlashes(&url_filename); | |
| 130 | |
| 131 // Strip double back-slashes ("\\\\"). | |
| 132 StripDoubleSlashes(&url_filename); | |
| 133 | |
| 134 // Save path as filesystem-safe characters. | |
| 135 url_filename = LegacyEscape(url_filename); | |
| 136 filename.append(url_filename); | |
| 137 | |
| 138 #ifndef WIN32 | |
| 139 // Last step - convert to native slashes. | |
| 140 const std::string slash("/"); | |
| 141 const std::string backslash("\\"); | |
| 142 ReplaceAll(&filename, backslash, slash); | |
| 143 #endif | |
| 144 } | |
| 145 | |
| 146 return filename; | |
| 147 } | |
| 148 | |
| 149 // Rewrite HTML in a form that the SPDY in-memory server | |
| 150 // can read. | |
| 151 // |filename_prefix| is prepended without escaping. | |
| 152 // |escaped_ending| is the URL to be encoded into a filename. It may have URL | |
| 153 // escaped characters (like %21 for !). | |
| 154 // |dir_separator| is "/" on Unix, "\" on Windows. | |
| 155 // |encoded_filename| is the resultant filename. | |
| 156 static void EncodeSegment(const std::string& filename_prefix, | |
| 157 const std::string& escaped_ending, | |
| 158 char dir_separator, | |
| 159 std::string* encoded_filename); | |
| 160 | |
| 161 // Decodes a filename that was encoded with EncodeSegment, | |
| 162 // yielding back the original URL. | |
| 163 static bool Decode(const std::string& encoded_filename, | |
| 164 char dir_separator, | |
| 165 std::string* decoded_url); | |
| 166 | |
| 167 static const char kEscapeChar; | |
| 168 static const char kTruncationChar; | |
| 169 static const size_t kMaximumSubdirectoryLength; | |
| 170 | |
| 171 friend class UrlToFilenameEncoderTest; | |
| 172 | |
| 173 private: | |
| 174 // Appends a segment of the path, special-casing "." and "..", and | |
| 175 // ensuring that the segment does not exceed the path length. If it does, | |
| 176 // it chops the end off the segment, writes the segment with a separator of | |
| 177 // ",-/", and then rewrites segment to contain just the truncated piece so | |
| 178 // it can be used in the next iteration. | |
| 179 // |segment| is a read/write parameter containing segment to write | |
| 180 // Note: this should not be called with empty segment. | |
| 181 static void AppendSegment(std::string* segment, std::string* dest); | |
| 182 | |
| 183 // Allow reading of old slurped files. | |
| 184 static std::string LegacyEscape(const std::string& path); | |
| 185 | |
| 186 // Replace all instances of |from| within |str| as |to|. | |
| 187 static void ReplaceAll(std::string* str, | |
| 188 const std::string& from, | |
| 189 const std::string& to) { | |
| 190 std::string::size_type pos(0); | |
| 191 while ((pos = str->find(from, pos)) != std::string::npos) { | |
| 192 str->replace(pos, from.size(), to); | |
| 193 pos += from.size(); | |
| 194 } | |
| 195 } | |
| 196 | |
| 197 // Replace all instances of "/" with "\" in |path|. | |
| 198 static void ConvertToSlashes(std::string* path) { | |
| 199 const std::string slash("/"); | |
| 200 const std::string backslash("\\"); | |
| 201 ReplaceAll(path, slash, backslash); | |
| 202 } | |
| 203 | |
| 204 // Replace all instances of "\\" with "%5C%5C" in |path|. | |
| 205 static void StripDoubleSlashes(std::string* path) { | |
| 206 const std::string doubleslash("\\\\"); | |
| 207 const std::string escaped_doubleslash("%5C%5C"); | |
| 208 ReplaceAll(path, doubleslash, escaped_doubleslash); | |
| 209 } | |
| 210 }; | |
| 211 | |
| 212 } // namespace net | |
| 213 | |
| 214 #endif // NET_TOOLS_FLIP_SERVER_URL_TO_FILENAME_ENCODER_H_ | |
| OLD | NEW |