OLD | NEW |
| (Empty) |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "net/tools/dump_cache/url_to_filename_encoder.h" | |
6 | |
7 #include <string> | |
8 #include <vector> | |
9 | |
10 #include "base/strings/string_util.h" | |
11 #include "base/strings/stringprintf.h" | |
12 #include "base/strings/string_piece.h" | |
13 #include "testing/gtest/include/gtest/gtest.h" | |
14 | |
15 using base::StringPiece; | |
16 using std::string; | |
17 | |
18 namespace net { | |
19 | |
20 #ifdef WIN32 | |
21 char kDirSeparator = '\\'; | |
22 char kOtherDirSeparator = '/'; | |
23 #else | |
24 char kDirSeparator = '/'; | |
25 char kOtherDirSeparator = '\\'; | |
26 #endif | |
27 | |
28 class UrlToFilenameEncoderTest : public ::testing::Test { | |
29 protected: | |
30 UrlToFilenameEncoderTest() : escape_(1, UrlToFilenameEncoder::kEscapeChar), | |
31 dir_sep_(1, kDirSeparator) { | |
32 } | |
33 | |
34 void CheckSegmentLength(const StringPiece& escaped_word) { | |
35 std::vector<StringPiece> components; | |
36 Tokenize(escaped_word, StringPiece("/"), &components); | |
37 for (size_t i = 0; i < components.size(); ++i) { | |
38 EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength, | |
39 components[i].size()); | |
40 } | |
41 } | |
42 | |
43 void CheckValidChars(const StringPiece& escaped_word, char invalid_slash) { | |
44 // These characters are invalid in Windows. We add in ', as that's pretty | |
45 // inconvenient in a Unix filename. | |
46 // | |
47 // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx | |
48 const string kInvalidChars = "<>:\"|?*'"; | |
49 for (size_t i = 0; i < escaped_word.size(); ++i) { | |
50 char c = escaped_word[i]; | |
51 EXPECT_EQ(string::npos, kInvalidChars.find(c)); | |
52 EXPECT_NE(invalid_slash, c); | |
53 EXPECT_NE('\0', c); // only invalid character in Posix | |
54 EXPECT_GT(0x7E, c); // only English printable characters | |
55 } | |
56 } | |
57 | |
58 void Validate(const string& in_word, const string& gold_word) { | |
59 string escaped_word, url; | |
60 UrlToFilenameEncoder::EncodeSegment( | |
61 std::string(), in_word, '/', &escaped_word); | |
62 EXPECT_EQ(gold_word, escaped_word); | |
63 CheckSegmentLength(escaped_word); | |
64 CheckValidChars(escaped_word, '\\'); | |
65 UrlToFilenameEncoder::Decode(escaped_word, '/', &url); | |
66 EXPECT_EQ(in_word, url); | |
67 } | |
68 | |
69 void ValidateAllSegmentsSmall(const string& in_word) { | |
70 string escaped_word, url; | |
71 UrlToFilenameEncoder::EncodeSegment( | |
72 std::string(), in_word, '/', &escaped_word); | |
73 CheckSegmentLength(escaped_word); | |
74 CheckValidChars(escaped_word, '\\'); | |
75 UrlToFilenameEncoder::Decode(escaped_word, '/', &url); | |
76 EXPECT_EQ(in_word, url); | |
77 } | |
78 | |
79 void ValidateNoChange(const string& word) { | |
80 // We always suffix the leaf with kEscapeChar, unless the leaf is empty. | |
81 Validate(word, word + escape_); | |
82 } | |
83 | |
84 void ValidateEscaped(unsigned char ch) { | |
85 // We always suffix the leaf with kEscapeChar, unless the leaf is empty. | |
86 char escaped[100]; | |
87 const char escape = UrlToFilenameEncoder::kEscapeChar; | |
88 base::snprintf(escaped, sizeof(escaped), "%c%02X%c", escape, ch, escape); | |
89 Validate(string(1, ch), escaped); | |
90 } | |
91 | |
92 void ValidateUrl(const string& url, const string& base_path, | |
93 bool legacy_escape, const string& gold_filename) { | |
94 string encoded_filename = UrlToFilenameEncoder::Encode( | |
95 url, base_path, legacy_escape); | |
96 EXPECT_EQ(gold_filename, encoded_filename); | |
97 if (!legacy_escape) { | |
98 CheckSegmentLength(encoded_filename); | |
99 CheckValidChars(encoded_filename, kOtherDirSeparator); | |
100 string decoded_url; | |
101 UrlToFilenameEncoder::Decode(encoded_filename, kDirSeparator, | |
102 &decoded_url); | |
103 if (url != decoded_url) { | |
104 EXPECT_EQ(url, "http://" + decoded_url); | |
105 } | |
106 } | |
107 } | |
108 | |
109 void ValidateUrlOldNew(const string& url, const string& gold_old_filename, | |
110 const string& gold_new_filename) { | |
111 ValidateUrl(url, std::string(), true, gold_old_filename); | |
112 ValidateUrl(url, std::string(), false, gold_new_filename); | |
113 } | |
114 | |
115 void ValidateEncodeSame(const string& url1, const string& url2) { | |
116 string filename1 = UrlToFilenameEncoder::Encode(url1, std::string(), false); | |
117 string filename2 = UrlToFilenameEncoder::Encode(url2, std::string(), false); | |
118 EXPECT_EQ(filename1, filename2); | |
119 } | |
120 | |
121 string escape_; | |
122 string dir_sep_; | |
123 }; | |
124 | |
125 TEST_F(UrlToFilenameEncoderTest, DoesNotEscape) { | |
126 ValidateNoChange(std::string()); | |
127 ValidateNoChange("abcdefg"); | |
128 ValidateNoChange("abcdefghijklmnopqrstuvwxyz"); | |
129 ValidateNoChange("ZYXWVUT"); | |
130 ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA"); | |
131 ValidateNoChange("01234567689"); | |
132 ValidateNoChange("_.=+-"); | |
133 ValidateNoChange("abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA" | |
134 "01234567689_.=+-"); | |
135 ValidateNoChange("index.html"); | |
136 ValidateNoChange("/"); | |
137 ValidateNoChange("/."); | |
138 ValidateNoChange("."); | |
139 ValidateNoChange(".."); | |
140 } | |
141 | |
142 TEST_F(UrlToFilenameEncoderTest, Escapes) { | |
143 const string bad_chars = | |
144 "<>:\"\\|?*" // Illegal on Windows | |
145 "~`!$^&(){}[]';" // Bad for Unix shells | |
146 "^@" // Build tool doesn't like | |
147 "#%" // Tool doesn't like | |
148 ","; // The escape char has to be escaped | |
149 | |
150 for (size_t i = 0; i < bad_chars.size(); ++i) { | |
151 ValidateEscaped(bad_chars[i]); | |
152 } | |
153 | |
154 // Check non-printable characters. | |
155 ValidateEscaped('\0'); | |
156 for (size_t i = 127; i < 256; ++i) { | |
157 ValidateEscaped(static_cast<char>(i)); | |
158 } | |
159 } | |
160 | |
161 TEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) { | |
162 Validate("mysite.com&x", "mysite.com" + escape_ + "26x" + escape_); | |
163 Validate("/./", "/" + escape_ + "./" + escape_); | |
164 Validate("/../", "/" + escape_ + "../" + escape_); | |
165 Validate("//", "/" + escape_ + "2F" + escape_); | |
166 Validate("/./leaf", "/" + escape_ + "./leaf" + escape_); | |
167 Validate("/../leaf", "/" + escape_ + "../leaf" + escape_); | |
168 Validate("//leaf", "/" + escape_ + "2Fleaf" + escape_); | |
169 Validate("mysite/u?param1=x¶m2=y", | |
170 "mysite/u" + escape_ + "3Fparam1=x" + escape_ + "26param2=y" + | |
171 escape_); | |
172 Validate("search?q=dogs&go=&form=QBLH&qs=n", // from Latency Labs bing test. | |
173 "search" + escape_ + "3Fq=dogs" + escape_ + "26go=" + escape_ + | |
174 "26form=QBLH" + escape_ + "26qs=n" + escape_); | |
175 Validate("~joebob/my_neeto-website+with_stuff.asp?id=138&content=true", | |
176 "" + escape_ + "7Ejoebob/my_neeto-website+with_stuff.asp" + escape_ + | |
177 "3Fid=138" + escape_ + "26content=true" + escape_); | |
178 } | |
179 | |
180 TEST_F(UrlToFilenameEncoderTest, EncodeUrlCorrectly) { | |
181 ValidateUrlOldNew("http://www.google.com/index.html", | |
182 "www.google.com" + dir_sep_ + "indexx2Ehtml", | |
183 "www.google.com" + dir_sep_ + "index.html" + escape_); | |
184 ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=", | |
185 "www.google.com" + dir_sep_ + "x" + dir_sep_ + | |
186 "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D", | |
187 | |
188 "www.google.com" + dir_sep_ + "x" + dir_sep_ + "search" + | |
189 escape_ + "3Fhl=en" + escape_ + "26q=dogs" + escape_ + | |
190 "26oq=" + escape_); | |
191 ValidateUrlOldNew("http://www.foo.com/a//", | |
192 "www.foo.com" + dir_sep_ + "ax255Cx255Cindexx2Ehtml", | |
193 "www.foo.com" + dir_sep_ + "a" + dir_sep_ + escape_ + "2F" + | |
194 escape_); | |
195 | |
196 // From bug: Double slash preserved. | |
197 ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html", | |
198 std::string(), | |
199 false, | |
200 "www.foo.com" + dir_sep_ + "u" + escape_ + "3Fsite=http" + | |
201 escape_ + "3A" + dir_sep_ + escape_ + "2Fwww.google.com" + | |
202 dir_sep_ + "index.html" + escape_); | |
203 ValidateUrlOldNew( | |
204 "http://blogutils.net/olct/online.php?" | |
205 "site=http://thelwordfanfics.blogspot.&interval=600", | |
206 | |
207 "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "onlinex2Ephpx3F" | |
208 "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D600", | |
209 | |
210 "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "online.php" + escape_ + | |
211 "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ + | |
212 "2Fthelwordfanfics.blogspot." + escape_ + "26interval=600" + escape_); | |
213 } | |
214 | |
215 // From bug: Escapes treated the same as normal char. | |
216 TEST_F(UrlToFilenameEncoderTest, UnescapeUrlsBeforeEncode) { | |
217 for (int i = 0; i < 128; ++i) { | |
218 string unescaped(1, static_cast<char>(i)); | |
219 string escaped = base::StringPrintf("%%%02X", i); | |
220 ValidateEncodeSame(unescaped, escaped); | |
221 } | |
222 | |
223 ValidateEncodeSame( | |
224 "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot" | |
225 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch", | |
226 | |
227 "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot" | |
228 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch"); | |
229 } | |
230 | |
231 // From bug: Filename encoding is not prefix-free. | |
232 TEST_F(UrlToFilenameEncoderTest, EscapeSecondSlash) { | |
233 Validate("/", "/" + escape_); | |
234 Validate("//", "/" + escape_ + "2F" + escape_); | |
235 Validate("///", "/" + escape_ + "2F" + "/" + escape_); | |
236 } | |
237 | |
238 TEST_F(UrlToFilenameEncoderTest, LongTail) { | |
239 static char long_word[] = | |
240 "~joebob/briggs/12345678901234567890123456789012345678901234567890" | |
241 "1234567890123456789012345678901234567890123456789012345678901234567890" | |
242 "1234567890123456789012345678901234567890123456789012345678901234567890" | |
243 "1234567890123456789012345678901234567890123456789012345678901234567890" | |
244 "1234567890123456789012345678901234567890123456789012345678901234567890" | |
245 "1234567890123456789012345678901234567890123456789012345678901234567890"; | |
246 | |
247 // the long lines in the string below are 64 characters, so we can see | |
248 // the slashes every 128. | |
249 string gold_long_word = | |
250 escape_ + "7Ejoebob/briggs/" | |
251 "1234567890123456789012345678901234567890123456789012345678901234" | |
252 "56789012345678901234567890123456789012345678901234567890123456" + | |
253 escape_ + "-/" | |
254 "7890123456789012345678901234567890123456789012345678901234567890" | |
255 "12345678901234567890123456789012345678901234567890123456789012" + | |
256 escape_ + "-/" | |
257 "3456789012345678901234567890123456789012345678901234567890123456" | |
258 "78901234567890123456789012345678901234567890123456789012345678" + | |
259 escape_ + "-/" | |
260 "9012345678901234567890" + escape_; | |
261 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, | |
262 sizeof(long_word)); | |
263 Validate(long_word, gold_long_word); | |
264 } | |
265 | |
266 TEST_F(UrlToFilenameEncoderTest, LongTailQuestion) { | |
267 // Here the '?' in the last path segment expands to @3F, making | |
268 // it hit 128 chars before the input segment gets that big. | |
269 static char long_word[] = | |
270 "~joebob/briggs/1234567?1234567?1234567?1234567?1234567?" | |
271 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" | |
272 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" | |
273 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" | |
274 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" | |
275 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"; | |
276 | |
277 // Notice that at the end of the third segment, we avoid splitting | |
278 // the (escape_ + "3F") that was generated from the "?", so that segment is | |
279 // only 127 characters. | |
280 string pattern = "1234567" + escape_ + "3F"; // 10 characters | |
281 string gold_long_word = | |
282 escape_ + "7Ejoebob/briggs/" + | |
283 pattern + pattern + pattern + pattern + pattern + pattern + "1234" | |
284 "567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + | |
285 "123456" + escape_ + "-/" | |
286 "7" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + | |
287 pattern + pattern + pattern + pattern + pattern + pattern + pattern + | |
288 "12" + | |
289 escape_ + "-/" | |
290 "34567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern | |
291 + "1234567" + escape_ + "3F" + pattern + pattern + pattern + pattern | |
292 + pattern + "1234567" + | |
293 escape_ + "-/" + | |
294 escape_ + "3F" + pattern + pattern + escape_; | |
295 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, | |
296 sizeof(long_word)); | |
297 Validate(long_word, gold_long_word); | |
298 } | |
299 | |
300 TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenNoEscape) { | |
301 // hit corner cases, +/- 4 characters from kMaxLen | |
302 for (int i = -4; i <= 4; ++i) { | |
303 string input; | |
304 input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength, 'x'); | |
305 ValidateAllSegmentsSmall(input); | |
306 } | |
307 } | |
308 | |
309 TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenWithEscape) { | |
310 // hit corner cases, +/- 4 characters from kMaxLen. This time we | |
311 // leave off the last 'x' and put in a '.', which ensures that we | |
312 // are truncating with '/' *after* the expansion. | |
313 for (int i = -4; i <= 4; ++i) { | |
314 string input; | |
315 input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength - 1, 'x'); | |
316 input.append(1, '.'); // this will expand to 3 characters. | |
317 ValidateAllSegmentsSmall(input); | |
318 } | |
319 } | |
320 | |
321 TEST_F(UrlToFilenameEncoderTest, LeafBranchAlias) { | |
322 Validate("/a/b/c", "/a/b/c" + escape_); // c is leaf file "c," | |
323 Validate("/a/b/c/d", "/a/b/c/d" + escape_); // c is directory "c" | |
324 Validate("/a/b/c/d/", "/a/b/c/d/" + escape_); | |
325 } | |
326 | |
327 | |
328 TEST_F(UrlToFilenameEncoderTest, BackslashSeparator) { | |
329 string long_word; | |
330 string escaped_word; | |
331 long_word.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength + 1, 'x'); | |
332 UrlToFilenameEncoder::EncodeSegment( | |
333 std::string(), long_word, '\\', &escaped_word); | |
334 | |
335 // check that one backslash, plus the escape ",-", and the ending , got added. | |
336 EXPECT_EQ(long_word.size() + 4, escaped_word.size()); | |
337 ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, | |
338 escaped_word.size()); | |
339 // Check that the backslash got inserted at the correct spot. | |
340 EXPECT_EQ('\\', escaped_word[ | |
341 UrlToFilenameEncoder::kMaximumSubdirectoryLength]); | |
342 } | |
343 | |
344 } // namespace net | |
345 | |
OLD | NEW |