Index: net/tools/flip_server/url_to_filename_encoder.cc |
diff --git a/net/tools/flip_server/url_to_filename_encoder.cc b/net/tools/flip_server/url_to_filename_encoder.cc |
deleted file mode 100644 |
index b5a01d18c37dcb87c71b5fb4f856b6b4c0b42230..0000000000000000000000000000000000000000 |
--- a/net/tools/flip_server/url_to_filename_encoder.cc |
+++ /dev/null |
@@ -1,275 +0,0 @@ |
-// Copyright (c) 2011 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#include "net/tools/flip_server/url_to_filename_encoder.h" |
- |
-#include <stdlib.h> |
- |
-#include "base/logging.h" |
-#include "base/strings/string_util.h" |
- |
-using std::string; |
- |
-namespace { |
- |
-#ifdef WIN32 |
-#define strtoull _strtoui64 |
-#endif |
- |
-// A simple parser for long long values. Returns the parsed value if a |
-// valid integer is found; else returns deflt |
-// UInt64 and Int64 cannot handle decimal numbers with leading 0s. |
-uint64_t ParseLeadingHex64Value(const char* str, uint64_t deflt) { |
- char* error = NULL; |
- const uint64_t value = strtoull(str, &error, 16); |
- return (error == str) ? deflt : value; |
-} |
- |
-} // namespace |
- |
-namespace net { |
- |
-// The escape character choice is made here -- all code and tests in this |
-// directory are based off of this constant. However, our testdata |
-// has tons of dependencies on this, so it cannot be changed without |
-// re-running those tests and fixing them. |
-const char UrlToFilenameEncoder::kEscapeChar = ','; |
-const char UrlToFilenameEncoder::kTruncationChar = '-'; |
-const size_t UrlToFilenameEncoder::kMaximumSubdirectoryLength = 128; |
- |
-void UrlToFilenameEncoder::AppendSegment(string* segment, string* dest) { |
- CHECK(!segment->empty()); |
- if ((*segment == ".") || (*segment == "..")) { |
- dest->append(1, kEscapeChar); |
- dest->append(*segment); |
- segment->clear(); |
- } else { |
- size_t segment_size = segment->size(); |
- if (segment_size > kMaximumSubdirectoryLength) { |
- // We need to inject ",-" at the end of the segment to signify that |
- // we are inserting an artificial '/'. This means we have to chop |
- // off at least two characters to make room. |
- segment_size = kMaximumSubdirectoryLength - 2; |
- |
- // But we don't want to break up an escape sequence that happens to lie at |
- // the end. Escape sequences are at most 2 characters. |
- if ((*segment)[segment_size - 1] == kEscapeChar) { |
- segment_size -= 1; |
- } else if ((*segment)[segment_size - 2] == kEscapeChar) { |
- segment_size -= 2; |
- } |
- dest->append(segment->data(), segment_size); |
- dest->append(1, kEscapeChar); |
- dest->append(1, kTruncationChar); |
- segment->erase(0, segment_size); |
- |
- // At this point, if we had segment_size=3, and segment="abcd", |
- // then after this erase, we will have written "abc,-" and set segment="d" |
- } else { |
- dest->append(*segment); |
- segment->clear(); |
- } |
- } |
-} |
- |
-void UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix, |
- const string& escaped_ending, |
- char dir_separator, |
- string* encoded_filename) { |
- string filename_ending = UrlUtilities::Unescape(escaped_ending); |
- |
- char encoded[3]; |
- int encoded_len; |
- string segment; |
- |
- // TODO(jmarantz): This code would be a bit simpler if we disallowed |
- // Instaweb allowing filename_prefix to not end in "/". We could |
- // then change the is routine to just take one input string. |
- size_t start_of_segment = filename_prefix.find_last_of(dir_separator); |
- if (start_of_segment == string::npos) { |
- segment = filename_prefix; |
- } else { |
- segment = filename_prefix.substr(start_of_segment + 1); |
- *encoded_filename = filename_prefix.substr(0, start_of_segment + 1); |
- } |
- |
- size_t index = 0; |
- // Special case the first / to avoid adding a leading kEscapeChar. |
- if (!filename_ending.empty() && (filename_ending[0] == dir_separator)) { |
- encoded_filename->append(segment); |
- segment.clear(); |
- encoded_filename->append(1, dir_separator); |
- ++index; |
- } |
- |
- for (; index < filename_ending.length(); ++index) { |
- unsigned char ch = static_cast<unsigned char>(filename_ending[index]); |
- |
- // Note: instead of outputing an empty segment, we let the second slash |
- // be escaped below. |
- if ((ch == dir_separator) && !segment.empty()) { |
- AppendSegment(&segment, encoded_filename); |
- encoded_filename->append(1, dir_separator); |
- segment.clear(); |
- } else { |
- // After removing unsafe chars the only safe ones are _.=+- and alphanums. |
- if ((ch == '_') || (ch == '.') || (ch == '=') || (ch == '+') || |
- (ch == '-') || (('0' <= ch) && (ch <= '9')) || |
- (('A' <= ch) && (ch <= 'Z')) || (('a' <= ch) && (ch <= 'z'))) { |
- encoded[0] = ch; |
- encoded_len = 1; |
- } else { |
- encoded[0] = kEscapeChar; |
- encoded[1] = ch / 16; |
- encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0'; |
- encoded[2] = ch % 16; |
- encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0'; |
- encoded_len = 3; |
- } |
- segment.append(encoded, encoded_len); |
- |
- // If segment is too big, we must chop it into chunks. |
- if (segment.size() > kMaximumSubdirectoryLength) { |
- AppendSegment(&segment, encoded_filename); |
- encoded_filename->append(1, dir_separator); |
- } |
- } |
- } |
- |
- // Append "," to the leaf filename so the leaf can also be a branch., e.g. |
- // allow http://a/b/c and http://a/b/c/d to co-exist as files "/a/b/c," and |
- // /a/b/c/d". So we will rename the "d" here to "d,". If doing that pushed |
- // us over the 128 char limit, then we will need to append "/" and the |
- // remaining chars. |
- segment += kEscapeChar; |
- AppendSegment(&segment, encoded_filename); |
- if (!segment.empty()) { |
- // The last overflow segment is special, because we appended in |
- // kEscapeChar above. We won't need to check it again for size |
- // or further escaping. |
- encoded_filename->append(1, dir_separator); |
- encoded_filename->append(segment); |
- } |
-} |
- |
-// Note: this decoder is not the exact inverse of the EncodeSegment above, |
-// because it does not take into account a prefix. |
-bool UrlToFilenameEncoder::Decode(const string& encoded_filename, |
- char dir_separator, |
- string* decoded_url) { |
- enum State { kStart, kEscape, kFirstDigit, kTruncate, kEscapeDot }; |
- State state = kStart; |
- char hex_buffer[3]; |
- hex_buffer[2] = '\0'; |
- for (size_t i = 0; i < encoded_filename.size(); ++i) { |
- char ch = encoded_filename[i]; |
- switch (state) { |
- case kStart: |
- if (ch == kEscapeChar) { |
- state = kEscape; |
- } else if (ch == dir_separator) { |
- decoded_url->append(1, '/'); // URLs only use '/' not '\\' |
- } else { |
- decoded_url->append(1, ch); |
- } |
- break; |
- case kEscape: |
- if (base::IsHexDigit(ch)) { |
- hex_buffer[0] = ch; |
- state = kFirstDigit; |
- } else if (ch == kTruncationChar) { |
- state = kTruncate; |
- } else if (ch == '.') { |
- decoded_url->append(1, '.'); |
- state = kEscapeDot; // Look for at most one more dot. |
- } else if (ch == dir_separator) { |
- // Consider url "//x". This was once encoded to "/,/x,". |
- // This code is what skips the first Escape. |
- decoded_url->append(1, '/'); // URLs only use '/' not '\\' |
- state = kStart; |
- } else { |
- return false; |
- } |
- break; |
- case kFirstDigit: |
- if (base::IsHexDigit(ch)) { |
- hex_buffer[1] = ch; |
- uint64_t hex_value = ParseLeadingHex64Value(hex_buffer, 0); |
- decoded_url->append(1, static_cast<char>(hex_value)); |
- state = kStart; |
- } else { |
- return false; |
- } |
- break; |
- case kTruncate: |
- if (ch == dir_separator) { |
- // Skip this separator, it was only put in to break up long |
- // path segments, but is not part of the URL. |
- state = kStart; |
- } else { |
- return false; |
- } |
- break; |
- case kEscapeDot: |
- decoded_url->append(1, ch); |
- state = kStart; |
- break; |
- } |
- } |
- |
- // All legal encoded filenames end in kEscapeChar. |
- return (state == kEscape); |
-} |
- |
-// Escape the given input |path| and chop any individual components |
-// of the path which are greater than kMaximumSubdirectoryLength characters |
-// into two chunks. |
-// |
-// This legacy version has several issues with aliasing of different URLs, |
-// inability to represent both /a/b/c and /a/b/c/d, and inability to decode |
-// the filenames back into URLs. |
-// |
-// But there is a large body of slurped data which depends on this format, |
-// so leave it as the default for spdy_in_mem_edsm_server. |
-string UrlToFilenameEncoder::LegacyEscape(const string& path) { |
- string output; |
- |
- // Note: We also chop paths into medium sized 'chunks'. |
- // This is due to the incompetence of the windows |
- // filesystem, which still hasn't figured out how |
- // to deal with long filenames. |
- int last_slash = 0; |
- for (size_t index = 0; index < path.length(); index++) { |
- char ch = path[index]; |
- if (ch == 0x5C) |
- last_slash = index; |
- if ((ch == 0x2D) || // hyphen |
- (ch == 0x5C) || (ch == 0x5F) || // backslash, underscore |
- ((0x30 <= ch) && (ch <= 0x39)) || // Digits [0-9] |
- ((0x41 <= ch) && (ch <= 0x5A)) || // Uppercase [A-Z] |
- ((0x61 <= ch) && (ch <= 0x7A))) { // Lowercase [a-z] |
- output.append(&path[index], 1); |
- } else { |
- char encoded[3]; |
- encoded[0] = 'x'; |
- encoded[1] = ch / 16; |
- encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0'; |
- encoded[2] = ch % 16; |
- encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0'; |
- output.append(encoded, 3); |
- } |
- if (index - last_slash > kMaximumSubdirectoryLength) { |
-#ifdef WIN32 |
- char slash = '\\'; |
-#else |
- char slash = '/'; |
-#endif |
- output.append(&slash, 1); |
- last_slash = index; |
- } |
- } |
- return output; |
-} |
- |
-} // namespace net |