| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "net/tools/flip_server/url_to_filename_encoder.h" | |
| 6 | |
| 7 #include <string> | |
| 8 #include <vector> | |
| 9 | |
| 10 #include "base/strings/string_piece.h" | |
| 11 #include "base/strings/string_split.h" | |
| 12 #include "base/strings/string_util.h" | |
| 13 #include "base/strings/stringprintf.h" | |
| 14 #include "testing/gtest/include/gtest/gtest.h" | |
| 15 | |
| 16 using base::StringPiece; | |
| 17 using std::string; | |
| 18 | |
| 19 namespace net { | |
| 20 | |
| 21 #ifdef WIN32 | |
| 22 char kDirSeparator = '\\'; | |
| 23 char kOtherDirSeparator = '/'; | |
| 24 #else | |
| 25 char kDirSeparator = '/'; | |
| 26 char kOtherDirSeparator = '\\'; | |
| 27 #endif | |
| 28 | |
| 29 class UrlToFilenameEncoderTest : public ::testing::Test { | |
| 30 protected: | |
| 31 UrlToFilenameEncoderTest() | |
| 32 : escape_(1, UrlToFilenameEncoder::kEscapeChar), | |
| 33 dir_sep_(1, kDirSeparator) {} | |
| 34 | |
| 35 void CheckSegmentLength(const StringPiece& escaped_word) { | |
| 36 for (const base::StringPiece& component : | |
| 37 base::SplitStringPiece(escaped_word, "/", base::KEEP_WHITESPACE, | |
| 38 base::SPLIT_WANT_NONEMPTY)) { | |
| 39 EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength, | |
| 40 component.size()); | |
| 41 } | |
| 42 } | |
| 43 | |
| 44 void CheckValidChars(const StringPiece& escaped_word, char invalid_slash) { | |
| 45 // These characters are invalid in Windows. We add in ', as that's pretty | |
| 46 // inconvenient in a Unix filename. | |
| 47 // | |
| 48 // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx | |
| 49 const string kInvalidChars = "<>:\"|?*'"; | |
| 50 for (size_t i = 0; i < escaped_word.size(); ++i) { | |
| 51 char c = escaped_word[i]; | |
| 52 EXPECT_EQ(string::npos, kInvalidChars.find(c)); | |
| 53 EXPECT_NE(invalid_slash, c); | |
| 54 EXPECT_NE('\0', c); // only invalid character in Posix | |
| 55 EXPECT_GT(0x7E, c); // only English printable characters | |
| 56 } | |
| 57 } | |
| 58 | |
| 59 void Validate(const string& in_word, const string& gold_word) { | |
| 60 string escaped_word, url; | |
| 61 UrlToFilenameEncoder::EncodeSegment(std::string(), in_word, '/', | |
| 62 &escaped_word); | |
| 63 EXPECT_EQ(gold_word, escaped_word); | |
| 64 CheckSegmentLength(escaped_word); | |
| 65 CheckValidChars(escaped_word, '\\'); | |
| 66 UrlToFilenameEncoder::Decode(escaped_word, '/', &url); | |
| 67 EXPECT_EQ(in_word, url); | |
| 68 } | |
| 69 | |
| 70 void ValidateAllSegmentsSmall(const string& in_word) { | |
| 71 string escaped_word, url; | |
| 72 UrlToFilenameEncoder::EncodeSegment(std::string(), in_word, '/', | |
| 73 &escaped_word); | |
| 74 CheckSegmentLength(escaped_word); | |
| 75 CheckValidChars(escaped_word, '\\'); | |
| 76 UrlToFilenameEncoder::Decode(escaped_word, '/', &url); | |
| 77 EXPECT_EQ(in_word, url); | |
| 78 } | |
| 79 | |
| 80 void ValidateNoChange(const string& word) { | |
| 81 // We always suffix the leaf with kEscapeChar, unless the leaf is empty. | |
| 82 Validate(word, word + escape_); | |
| 83 } | |
| 84 | |
| 85 void ValidateEscaped(unsigned char ch) { | |
| 86 // We always suffix the leaf with kEscapeChar, unless the leaf is empty. | |
| 87 char escaped[100]; | |
| 88 const char escape = UrlToFilenameEncoder::kEscapeChar; | |
| 89 base::snprintf(escaped, sizeof(escaped), "%c%02X%c", escape, ch, escape); | |
| 90 Validate(string(1, ch), escaped); | |
| 91 } | |
| 92 | |
| 93 void ValidateUrl(const string& url, | |
| 94 const string& base_path, | |
| 95 bool legacy_escape, | |
| 96 const string& gold_filename) { | |
| 97 string encoded_filename = | |
| 98 UrlToFilenameEncoder::Encode(url, base_path, legacy_escape); | |
| 99 EXPECT_EQ(gold_filename, encoded_filename); | |
| 100 if (!legacy_escape) { | |
| 101 CheckSegmentLength(encoded_filename); | |
| 102 CheckValidChars(encoded_filename, kOtherDirSeparator); | |
| 103 string decoded_url; | |
| 104 UrlToFilenameEncoder::Decode(encoded_filename, kDirSeparator, | |
| 105 &decoded_url); | |
| 106 if (url != decoded_url) { | |
| 107 EXPECT_EQ(url, "http://" + decoded_url); | |
| 108 } | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 void ValidateUrlOldNew(const string& url, | |
| 113 const string& gold_old_filename, | |
| 114 const string& gold_new_filename) { | |
| 115 ValidateUrl(url, std::string(), true, gold_old_filename); | |
| 116 ValidateUrl(url, std::string(), false, gold_new_filename); | |
| 117 } | |
| 118 | |
| 119 void ValidateEncodeSame(const string& url1, const string& url2) { | |
| 120 string filename1 = UrlToFilenameEncoder::Encode(url1, std::string(), false); | |
| 121 string filename2 = UrlToFilenameEncoder::Encode(url2, std::string(), false); | |
| 122 EXPECT_EQ(filename1, filename2); | |
| 123 } | |
| 124 | |
| 125 string escape_; | |
| 126 string dir_sep_; | |
| 127 }; | |
| 128 | |
| 129 TEST_F(UrlToFilenameEncoderTest, DoesNotEscape) { | |
| 130 ValidateNoChange(std::string()); | |
| 131 ValidateNoChange("abcdefg"); | |
| 132 ValidateNoChange("abcdefghijklmnopqrstuvwxyz"); | |
| 133 ValidateNoChange("ZYXWVUT"); | |
| 134 ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA"); | |
| 135 ValidateNoChange("01234567689"); | |
| 136 ValidateNoChange("_.=+-"); | |
| 137 ValidateNoChange( | |
| 138 "abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA" | |
| 139 "01234567689_.=+-"); | |
| 140 ValidateNoChange("index.html"); | |
| 141 ValidateNoChange("/"); | |
| 142 ValidateNoChange("/."); | |
| 143 ValidateNoChange("."); | |
| 144 ValidateNoChange(".."); | |
| 145 } | |
| 146 | |
| 147 TEST_F(UrlToFilenameEncoderTest, Escapes) { | |
| 148 const string bad_chars = | |
| 149 "<>:\"\\|?*" // Illegal on Windows | |
| 150 "~`!$^&(){}[]';" // Bad for Unix shells | |
| 151 "^@" // Build tool doesn't like | |
| 152 "#%" // Tool doesn't like | |
| 153 ","; // The escape char has to be escaped | |
| 154 | |
| 155 for (size_t i = 0; i < bad_chars.size(); ++i) { | |
| 156 ValidateEscaped(bad_chars[i]); | |
| 157 } | |
| 158 | |
| 159 // Check non-printable characters. | |
| 160 ValidateEscaped('\0'); | |
| 161 for (size_t i = 127; i < 256; ++i) { | |
| 162 ValidateEscaped(static_cast<char>(i)); | |
| 163 } | |
| 164 } | |
| 165 | |
| 166 TEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) { | |
| 167 Validate("mysite.com&x", "mysite.com" + escape_ + "26x" + escape_); | |
| 168 Validate("/./", "/" + escape_ + "./" + escape_); | |
| 169 Validate("/../", "/" + escape_ + "../" + escape_); | |
| 170 Validate("//", "/" + escape_ + "2F" + escape_); | |
| 171 Validate("/./leaf", "/" + escape_ + "./leaf" + escape_); | |
| 172 Validate("/../leaf", "/" + escape_ + "../leaf" + escape_); | |
| 173 Validate("//leaf", "/" + escape_ + "2Fleaf" + escape_); | |
| 174 Validate("mysite/u?param1=x¶m2=y", "mysite/u" + escape_ + "3Fparam1=x" + | |
| 175 escape_ + "26param2=y" + escape_); | |
| 176 Validate("search?q=dogs&go=&form=QBLH&qs=n", // from Latency Labs bing test. | |
| 177 "search" + escape_ + "3Fq=dogs" + escape_ + "26go=" + escape_ + | |
| 178 "26form=QBLH" + escape_ + "26qs=n" + escape_); | |
| 179 Validate("~joebob/my_neeto-website+with_stuff.asp?id=138&content=true", | |
| 180 "" + escape_ + "7Ejoebob/my_neeto-website+with_stuff.asp" + escape_ + | |
| 181 "3Fid=138" + escape_ + "26content=true" + escape_); | |
| 182 } | |
| 183 | |
| 184 TEST_F(UrlToFilenameEncoderTest, EncodeUrlCorrectly) { | |
| 185 ValidateUrlOldNew("http://www.google.com/index.html", | |
| 186 "www.google.com" + dir_sep_ + "indexx2Ehtml", | |
| 187 "www.google.com" + dir_sep_ + "index.html" + escape_); | |
| 188 ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=", | |
| 189 "www.google.com" + dir_sep_ + "x" + dir_sep_ + | |
| 190 "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D", | |
| 191 | |
| 192 "www.google.com" + dir_sep_ + "x" + dir_sep_ + "search" + | |
| 193 escape_ + "3Fhl=en" + escape_ + "26q=dogs" + escape_ + | |
| 194 "26oq=" + escape_); | |
| 195 ValidateUrlOldNew( | |
| 196 "http://www.foo.com/a//", | |
| 197 "www.foo.com" + dir_sep_ + "ax255Cx255Cindexx2Ehtml", | |
| 198 "www.foo.com" + dir_sep_ + "a" + dir_sep_ + escape_ + "2F" + escape_); | |
| 199 | |
| 200 // From bug: Double slash preserved. | |
| 201 ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html", | |
| 202 std::string(), false, | |
| 203 "www.foo.com" + dir_sep_ + "u" + escape_ + "3Fsite=http" + | |
| 204 escape_ + "3A" + dir_sep_ + escape_ + "2Fwww.google.com" + | |
| 205 dir_sep_ + "index.html" + escape_); | |
| 206 ValidateUrlOldNew( | |
| 207 "http://blogutils.net/olct/online.php?" | |
| 208 "site=http://thelwordfanfics.blogspot.&interval=600", | |
| 209 | |
| 210 "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + | |
| 211 "onlinex2Ephpx3F" | |
| 212 "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D6" | |
| 213 "00", | |
| 214 | |
| 215 "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "online.php" + escape_ + | |
| 216 "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ + | |
| 217 "2Fthelwordfanfics.blogspot." + escape_ + "26interval=600" + escape_); | |
| 218 } | |
| 219 | |
| 220 // From bug: Escapes treated the same as normal char. | |
| 221 TEST_F(UrlToFilenameEncoderTest, UnescapeUrlsBeforeEncode) { | |
| 222 for (int i = 0; i < 128; ++i) { | |
| 223 string unescaped(1, static_cast<char>(i)); | |
| 224 string escaped = base::StringPrintf("%%%02X", i); | |
| 225 ValidateEncodeSame(unescaped, escaped); | |
| 226 } | |
| 227 | |
| 228 ValidateEncodeSame( | |
| 229 "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot" | |
| 230 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch", | |
| 231 | |
| 232 "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot" | |
| 233 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch"); | |
| 234 } | |
| 235 | |
| 236 // From bug: Filename encoding is not prefix-free. | |
| 237 TEST_F(UrlToFilenameEncoderTest, EscapeSecondSlash) { | |
| 238 Validate("/", "/" + escape_); | |
| 239 Validate("//", "/" + escape_ + "2F" + escape_); | |
| 240 Validate("///", "/" + escape_ + "2F" + "/" + escape_); | |
| 241 } | |
| 242 | |
| 243 TEST_F(UrlToFilenameEncoderTest, LongTail) { | |
| 244 static char long_word[] = | |
| 245 "~joebob/briggs/12345678901234567890123456789012345678901234567890" | |
| 246 "1234567890123456789012345678901234567890123456789012345678901234567890" | |
| 247 "1234567890123456789012345678901234567890123456789012345678901234567890" | |
| 248 "1234567890123456789012345678901234567890123456789012345678901234567890" | |
| 249 "1234567890123456789012345678901234567890123456789012345678901234567890" | |
| 250 "1234567890123456789012345678901234567890123456789012345678901234567890"; | |
| 251 | |
| 252 // the long lines in the string below are 64 characters, so we can see | |
| 253 // the slashes every 128. | |
| 254 string gold_long_word = | |
| 255 escape_ + | |
| 256 "7Ejoebob/briggs/" | |
| 257 "1234567890123456789012345678901234567890123456789012345678901234" | |
| 258 "56789012345678901234567890123456789012345678901234567890123456" + | |
| 259 escape_ + | |
| 260 "-/" | |
| 261 "7890123456789012345678901234567890123456789012345678901234567890" | |
| 262 "12345678901234567890123456789012345678901234567890123456789012" + | |
| 263 escape_ + | |
| 264 "-/" | |
| 265 "3456789012345678901234567890123456789012345678901234567890123456" | |
| 266 "78901234567890123456789012345678901234567890123456789012345678" + | |
| 267 escape_ + | |
| 268 "-/" | |
| 269 "9012345678901234567890" + | |
| 270 escape_; | |
| 271 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, | |
| 272 sizeof(long_word)); | |
| 273 Validate(long_word, gold_long_word); | |
| 274 } | |
| 275 | |
| 276 TEST_F(UrlToFilenameEncoderTest, LongTailQuestion) { | |
| 277 // Here the '?' in the last path segment expands to @3F, making | |
| 278 // it hit 128 chars before the input segment gets that big. | |
| 279 static char long_word[] = | |
| 280 "~joebob/briggs/1234567?1234567?1234567?1234567?1234567?" | |
| 281 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" | |
| 282 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" | |
| 283 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" | |
| 284 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" | |
| 285 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"; | |
| 286 | |
| 287 // Notice that at the end of the third segment, we avoid splitting | |
| 288 // the (escape_ + "3F") that was generated from the "?", so that segment is | |
| 289 // only 127 characters. | |
| 290 string pattern = "1234567" + escape_ + "3F"; // 10 characters | |
| 291 string gold_long_word = | |
| 292 escape_ + "7Ejoebob/briggs/" + pattern + pattern + pattern + pattern + | |
| 293 pattern + pattern + | |
| 294 "1234" | |
| 295 "567" + | |
| 296 escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + | |
| 297 "123456" + escape_ + | |
| 298 "-/" | |
| 299 "7" + | |
| 300 escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + | |
| 301 pattern + pattern + pattern + pattern + pattern + pattern + pattern + | |
| 302 "12" + escape_ + | |
| 303 "-/" | |
| 304 "34567" + | |
| 305 escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + | |
| 306 "1234567" + escape_ + "3F" + pattern + pattern + pattern + pattern + | |
| 307 pattern + "1234567" + escape_ + "-/" + escape_ + "3F" + pattern + | |
| 308 pattern + escape_; | |
| 309 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, | |
| 310 sizeof(long_word)); | |
| 311 Validate(long_word, gold_long_word); | |
| 312 } | |
| 313 | |
| 314 TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenNoEscape) { | |
| 315 // hit corner cases, +/- 4 characters from kMaxLen | |
| 316 for (int i = -4; i <= 4; ++i) { | |
| 317 string input; | |
| 318 input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength, 'x'); | |
| 319 ValidateAllSegmentsSmall(input); | |
| 320 } | |
| 321 } | |
| 322 | |
| 323 TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenWithEscape) { | |
| 324 // hit corner cases, +/- 4 characters from kMaxLen. This time we | |
| 325 // leave off the last 'x' and put in a '.', which ensures that we | |
| 326 // are truncating with '/' *after* the expansion. | |
| 327 for (int i = -4; i <= 4; ++i) { | |
| 328 string input; | |
| 329 input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength - 1, 'x'); | |
| 330 input.append(1, '.'); // this will expand to 3 characters. | |
| 331 ValidateAllSegmentsSmall(input); | |
| 332 } | |
| 333 } | |
| 334 | |
| 335 TEST_F(UrlToFilenameEncoderTest, LeafBranchAlias) { | |
| 336 Validate("/a/b/c", "/a/b/c" + escape_); // c is leaf file "c," | |
| 337 Validate("/a/b/c/d", "/a/b/c/d" + escape_); // c is directory "c" | |
| 338 Validate("/a/b/c/d/", "/a/b/c/d/" + escape_); | |
| 339 } | |
| 340 | |
| 341 TEST_F(UrlToFilenameEncoderTest, BackslashSeparator) { | |
| 342 string long_word; | |
| 343 string escaped_word; | |
| 344 long_word.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength + 1, 'x'); | |
| 345 UrlToFilenameEncoder::EncodeSegment(std::string(), long_word, '\\', | |
| 346 &escaped_word); | |
| 347 | |
| 348 // check that one backslash, plus the escape ",-", and the ending , got added. | |
| 349 EXPECT_EQ(long_word.size() + 4, escaped_word.size()); | |
| 350 ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, | |
| 351 escaped_word.size()); | |
| 352 // Check that the backslash got inserted at the correct spot. | |
| 353 EXPECT_EQ('\\', | |
| 354 escaped_word[UrlToFilenameEncoder::kMaximumSubdirectoryLength]); | |
| 355 } | |
| 356 | |
| 357 } // namespace net | |
| OLD | NEW |