Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(272)

Side by Side Diff: net/tools/flip_server/url_to_filename_encoder.h

Issue 2169503002: Remove flip_server. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // URL filename encoder goals:
6 //
7 // 1. Allow URLs with arbitrary path-segment length, generating filenames
8 // with a maximum of 128 characters.
9 // 2. Provide a somewhat human readable filenames, for easy debugging flow.
10 // 3. Provide reverse-mapping from filenames back to URLs.
11 // 4. Be able to distinguish http://x from http://x/ from http://x/index.html.
12 // Those can all be different URLs.
13 // 5. Be able to represent http://a/b/c and http://a/b/c/d, a pattern seen
14 // with Facebook Connect.
15 //
16 // We need an escape-character for representing characters that are legal
17 // in URL paths, but not in filenames, such as '?'.
18 //
19 // We can pick any legal character as an escape, as long as we escape it too.
20 // But as we have a goal of having filenames that humans can correlate with
21 // URLs, we should pick one that doesn't show up frequently in URLs. Candidates
22 // are ~`!@#$%^&()-=_+{}[],. but we would prefer to avoid characters that are
23 // shell escapes or that various build tools use.
24 //
25 // .#&%-=_+ occur frequently in URLs.
26 // <>:"/\|?* are illegal in Windows
27 // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx
28 // ~`!$^&(){}[]'; are special to Unix shells
29 // In addition, build tools do not like ^@#%
30 //
31 // Josh took a quick look at the frequency of some special characters in
32 // Sadeesh's slurped directory from Fall 09 and found the following occurances:
33 //
34 // ^ 3 build tool doesn't like ^ in testdata filenames
35 // @ 10 build tool doesn't like @ in testdata filenames
36 // . 1676 too frequent in URLs
37 // , 76 THE WINNER
38 // # 0 build tool doesn't like it
39 // & 487 Prefer to avoid shell escapes
40 // % 374 g4 doesn't like it
41 // = 579 very frequent in URLs -- leave unmodified
42 // - 464 very frequent in URLs -- leave unmodified
43 // _ 798 very frequent in URLs -- leave unmodified
44 //
45 //
46 // The escaping algorithm is:
47 // 1) Escape all unfriendly symbols as ,XX where XX is the hex code.
48 // 2) Add a ',' at the end (We do not allow ',' at end of any directory name,
49 // so this assures that e.g. /a and /a/b can coexist in the filesystem).
50 // 3) Go through the path segment by segment (where a segment is one directory
51 // or leaf in the path) and
52 // 3a) If the segment is empty, escape the second slash. i.e. if it was
53 // www.foo.com//a then we escape the second / like www.foo.com/,2Fa,
54 // 3a) If it is "." or ".." prepend with ',' (so that we have a non-
55 // empty and non-reserved filename).
56 // 3b) If it is over 128 characters, break it up into smaller segments by
57 // inserting ,-/ (Windows limits paths to 128 chars, other OSes also
58 // have limits that would restrict us)
59 //
60 // For example:
61 // URL File
62 // / /,
63 // /index.html /index.html,
64 // /. /.,
65 // /a/b /a/b,
66 // /a/b/ /a/b/,
67 // /a/b/c /a/b/c, Note: no prefix problem
68 // /u?foo=bar /u,3Ffoo=bar,
69 // // /,2F,
70 // /./ /,./,
71 // /../ /,../,
72 // /, /,2C,
73 // /,./ /,2C./,
74 // /very...longname/ /very...long,-/name If very...long is about 126 long.
75
76 // NOTE: we avoid using some classes here (like FilePath and GURL) because we
77 // share this code with other projects externally.
78
79 #ifndef NET_TOOLS_FLIP_SERVER_URL_TO_FILENAME_ENCODER_H_
80 #define NET_TOOLS_FLIP_SERVER_URL_TO_FILENAME_ENCODER_H_
81
82 #include <stddef.h>
83
84 #include <string>
85
86 #include "base/strings/string_util.h"
87 #include "net/tools/flip_server/url_utilities.h"
88
89 namespace net {
90
91 // Helper class for converting a URL into a filename.
92 class UrlToFilenameEncoder {
93 public:
94 // Given a |url| and a |base_path|, returns a filename which represents this
95 // |url|. |url| may include URL escaping such as %21 for !
96 // |legacy_escape| indicates that this function should use the old-style
97 // of encoding.
98 // TODO(mbelshe): delete the legacy_escape code.
99 static std::string Encode(const std::string& url,
100 std::string base_path,
101 bool legacy_escape) {
102 std::string filename;
103 if (!legacy_escape) {
104 std::string url_no_scheme = UrlUtilities::GetUrlHostPath(url);
105 EncodeSegment(base_path, url_no_scheme, '/', &filename);
106 #ifdef WIN32
107 ReplaceAll(&filename, "/", "\\");
108 #endif
109 } else {
110 std::string clean_url(url);
111 if (clean_url.length() && clean_url.back() == '/')
112 clean_url.append("index.html");
113
114 std::string host = UrlUtilities::GetUrlHost(clean_url);
115 filename.append(base_path);
116 filename.append(host);
117 #ifdef WIN32
118 filename.append("\\");
119 #else
120 filename.append("/");
121 #endif
122
123 std::string url_filename = UrlUtilities::GetUrlPath(clean_url);
124 // Strip the leading '/'.
125 if (url_filename[0] == '/')
126 url_filename = url_filename.substr(1);
127
128 // Replace '/' with '\'.
129 ConvertToSlashes(&url_filename);
130
131 // Strip double back-slashes ("\\\\").
132 StripDoubleSlashes(&url_filename);
133
134 // Save path as filesystem-safe characters.
135 url_filename = LegacyEscape(url_filename);
136 filename.append(url_filename);
137
138 #ifndef WIN32
139 // Last step - convert to native slashes.
140 const std::string slash("/");
141 const std::string backslash("\\");
142 ReplaceAll(&filename, backslash, slash);
143 #endif
144 }
145
146 return filename;
147 }
148
149 // Rewrite HTML in a form that the SPDY in-memory server
150 // can read.
151 // |filename_prefix| is prepended without escaping.
152 // |escaped_ending| is the URL to be encoded into a filename. It may have URL
153 // escaped characters (like %21 for !).
154 // |dir_separator| is "/" on Unix, "\" on Windows.
155 // |encoded_filename| is the resultant filename.
156 static void EncodeSegment(const std::string& filename_prefix,
157 const std::string& escaped_ending,
158 char dir_separator,
159 std::string* encoded_filename);
160
161 // Decodes a filename that was encoded with EncodeSegment,
162 // yielding back the original URL.
163 static bool Decode(const std::string& encoded_filename,
164 char dir_separator,
165 std::string* decoded_url);
166
167 static const char kEscapeChar;
168 static const char kTruncationChar;
169 static const size_t kMaximumSubdirectoryLength;
170
171 friend class UrlToFilenameEncoderTest;
172
173 private:
174 // Appends a segment of the path, special-casing "." and "..", and
175 // ensuring that the segment does not exceed the path length. If it does,
176 // it chops the end off the segment, writes the segment with a separator of
177 // ",-/", and then rewrites segment to contain just the truncated piece so
178 // it can be used in the next iteration.
179 // |segment| is a read/write parameter containing segment to write
180 // Note: this should not be called with empty segment.
181 static void AppendSegment(std::string* segment, std::string* dest);
182
183 // Allow reading of old slurped files.
184 static std::string LegacyEscape(const std::string& path);
185
186 // Replace all instances of |from| within |str| as |to|.
187 static void ReplaceAll(std::string* str,
188 const std::string& from,
189 const std::string& to) {
190 std::string::size_type pos(0);
191 while ((pos = str->find(from, pos)) != std::string::npos) {
192 str->replace(pos, from.size(), to);
193 pos += from.size();
194 }
195 }
196
197 // Replace all instances of "/" with "\" in |path|.
198 static void ConvertToSlashes(std::string* path) {
199 const std::string slash("/");
200 const std::string backslash("\\");
201 ReplaceAll(path, slash, backslash);
202 }
203
204 // Replace all instances of "\\" with "%5C%5C" in |path|.
205 static void StripDoubleSlashes(std::string* path) {
206 const std::string doubleslash("\\\\");
207 const std::string escaped_doubleslash("%5C%5C");
208 ReplaceAll(path, doubleslash, escaped_doubleslash);
209 }
210 };
211
212 } // namespace net
213
214 #endif // NET_TOOLS_FLIP_SERVER_URL_TO_FILENAME_ENCODER_H_
OLDNEW
« no previous file with comments | « net/tools/flip_server/tcp_socket_util.cc ('k') | net/tools/flip_server/url_to_filename_encoder.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698