| Index: net/tools/flip_server/url_to_filename_encoder.cc
|
| diff --git a/net/tools/flip_server/url_to_filename_encoder.cc b/net/tools/flip_server/url_to_filename_encoder.cc
|
| deleted file mode 100644
|
| index b5a01d18c37dcb87c71b5fb4f856b6b4c0b42230..0000000000000000000000000000000000000000
|
| --- a/net/tools/flip_server/url_to_filename_encoder.cc
|
| +++ /dev/null
|
| @@ -1,275 +0,0 @@
|
| -// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include "net/tools/flip_server/url_to_filename_encoder.h"
|
| -
|
| -#include <stdlib.h>
|
| -
|
| -#include "base/logging.h"
|
| -#include "base/strings/string_util.h"
|
| -
|
| -using std::string;
|
| -
|
| -namespace {
|
| -
|
| -#ifdef WIN32
|
| -#define strtoull _strtoui64
|
| -#endif
|
| -
|
| -// A simple parser for long long values. Returns the parsed value if a
|
| -// valid integer is found; else returns deflt
|
| -// UInt64 and Int64 cannot handle decimal numbers with leading 0s.
|
| -uint64_t ParseLeadingHex64Value(const char* str, uint64_t deflt) {
|
| - char* error = NULL;
|
| - const uint64_t value = strtoull(str, &error, 16);
|
| - return (error == str) ? deflt : value;
|
| -}
|
| -
|
| -} // namespace
|
| -
|
| -namespace net {
|
| -
|
| -// The escape character choice is made here -- all code and tests in this
|
| -// directory are based off of this constant. However, our testdata
|
| -// has tons of dependencies on this, so it cannot be changed without
|
| -// re-running those tests and fixing them.
|
| -const char UrlToFilenameEncoder::kEscapeChar = ',';
|
| -const char UrlToFilenameEncoder::kTruncationChar = '-';
|
| -const size_t UrlToFilenameEncoder::kMaximumSubdirectoryLength = 128;
|
| -
|
| -void UrlToFilenameEncoder::AppendSegment(string* segment, string* dest) {
|
| - CHECK(!segment->empty());
|
| - if ((*segment == ".") || (*segment == "..")) {
|
| - dest->append(1, kEscapeChar);
|
| - dest->append(*segment);
|
| - segment->clear();
|
| - } else {
|
| - size_t segment_size = segment->size();
|
| - if (segment_size > kMaximumSubdirectoryLength) {
|
| - // We need to inject ",-" at the end of the segment to signify that
|
| - // we are inserting an artificial '/'. This means we have to chop
|
| - // off at least two characters to make room.
|
| - segment_size = kMaximumSubdirectoryLength - 2;
|
| -
|
| - // But we don't want to break up an escape sequence that happens to lie at
|
| - // the end. Escape sequences are at most 2 characters.
|
| - if ((*segment)[segment_size - 1] == kEscapeChar) {
|
| - segment_size -= 1;
|
| - } else if ((*segment)[segment_size - 2] == kEscapeChar) {
|
| - segment_size -= 2;
|
| - }
|
| - dest->append(segment->data(), segment_size);
|
| - dest->append(1, kEscapeChar);
|
| - dest->append(1, kTruncationChar);
|
| - segment->erase(0, segment_size);
|
| -
|
| - // At this point, if we had segment_size=3, and segment="abcd",
|
| - // then after this erase, we will have written "abc,-" and set segment="d"
|
| - } else {
|
| - dest->append(*segment);
|
| - segment->clear();
|
| - }
|
| - }
|
| -}
|
| -
|
| -void UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix,
|
| - const string& escaped_ending,
|
| - char dir_separator,
|
| - string* encoded_filename) {
|
| - string filename_ending = UrlUtilities::Unescape(escaped_ending);
|
| -
|
| - char encoded[3];
|
| - int encoded_len;
|
| - string segment;
|
| -
|
| - // TODO(jmarantz): This code would be a bit simpler if we disallowed
|
| - // Instaweb allowing filename_prefix to not end in "/". We could
|
| - // then change the is routine to just take one input string.
|
| - size_t start_of_segment = filename_prefix.find_last_of(dir_separator);
|
| - if (start_of_segment == string::npos) {
|
| - segment = filename_prefix;
|
| - } else {
|
| - segment = filename_prefix.substr(start_of_segment + 1);
|
| - *encoded_filename = filename_prefix.substr(0, start_of_segment + 1);
|
| - }
|
| -
|
| - size_t index = 0;
|
| - // Special case the first / to avoid adding a leading kEscapeChar.
|
| - if (!filename_ending.empty() && (filename_ending[0] == dir_separator)) {
|
| - encoded_filename->append(segment);
|
| - segment.clear();
|
| - encoded_filename->append(1, dir_separator);
|
| - ++index;
|
| - }
|
| -
|
| - for (; index < filename_ending.length(); ++index) {
|
| - unsigned char ch = static_cast<unsigned char>(filename_ending[index]);
|
| -
|
| - // Note: instead of outputing an empty segment, we let the second slash
|
| - // be escaped below.
|
| - if ((ch == dir_separator) && !segment.empty()) {
|
| - AppendSegment(&segment, encoded_filename);
|
| - encoded_filename->append(1, dir_separator);
|
| - segment.clear();
|
| - } else {
|
| - // After removing unsafe chars the only safe ones are _.=+- and alphanums.
|
| - if ((ch == '_') || (ch == '.') || (ch == '=') || (ch == '+') ||
|
| - (ch == '-') || (('0' <= ch) && (ch <= '9')) ||
|
| - (('A' <= ch) && (ch <= 'Z')) || (('a' <= ch) && (ch <= 'z'))) {
|
| - encoded[0] = ch;
|
| - encoded_len = 1;
|
| - } else {
|
| - encoded[0] = kEscapeChar;
|
| - encoded[1] = ch / 16;
|
| - encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
|
| - encoded[2] = ch % 16;
|
| - encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
|
| - encoded_len = 3;
|
| - }
|
| - segment.append(encoded, encoded_len);
|
| -
|
| - // If segment is too big, we must chop it into chunks.
|
| - if (segment.size() > kMaximumSubdirectoryLength) {
|
| - AppendSegment(&segment, encoded_filename);
|
| - encoded_filename->append(1, dir_separator);
|
| - }
|
| - }
|
| - }
|
| -
|
| - // Append "," to the leaf filename so the leaf can also be a branch., e.g.
|
| - // allow http://a/b/c and http://a/b/c/d to co-exist as files "/a/b/c," and
|
| - // /a/b/c/d". So we will rename the "d" here to "d,". If doing that pushed
|
| - // us over the 128 char limit, then we will need to append "/" and the
|
| - // remaining chars.
|
| - segment += kEscapeChar;
|
| - AppendSegment(&segment, encoded_filename);
|
| - if (!segment.empty()) {
|
| - // The last overflow segment is special, because we appended in
|
| - // kEscapeChar above. We won't need to check it again for size
|
| - // or further escaping.
|
| - encoded_filename->append(1, dir_separator);
|
| - encoded_filename->append(segment);
|
| - }
|
| -}
|
| -
|
| -// Note: this decoder is not the exact inverse of the EncodeSegment above,
|
| -// because it does not take into account a prefix.
|
| -bool UrlToFilenameEncoder::Decode(const string& encoded_filename,
|
| - char dir_separator,
|
| - string* decoded_url) {
|
| - enum State { kStart, kEscape, kFirstDigit, kTruncate, kEscapeDot };
|
| - State state = kStart;
|
| - char hex_buffer[3];
|
| - hex_buffer[2] = '\0';
|
| - for (size_t i = 0; i < encoded_filename.size(); ++i) {
|
| - char ch = encoded_filename[i];
|
| - switch (state) {
|
| - case kStart:
|
| - if (ch == kEscapeChar) {
|
| - state = kEscape;
|
| - } else if (ch == dir_separator) {
|
| - decoded_url->append(1, '/'); // URLs only use '/' not '\\'
|
| - } else {
|
| - decoded_url->append(1, ch);
|
| - }
|
| - break;
|
| - case kEscape:
|
| - if (base::IsHexDigit(ch)) {
|
| - hex_buffer[0] = ch;
|
| - state = kFirstDigit;
|
| - } else if (ch == kTruncationChar) {
|
| - state = kTruncate;
|
| - } else if (ch == '.') {
|
| - decoded_url->append(1, '.');
|
| - state = kEscapeDot; // Look for at most one more dot.
|
| - } else if (ch == dir_separator) {
|
| - // Consider url "//x". This was once encoded to "/,/x,".
|
| - // This code is what skips the first Escape.
|
| - decoded_url->append(1, '/'); // URLs only use '/' not '\\'
|
| - state = kStart;
|
| - } else {
|
| - return false;
|
| - }
|
| - break;
|
| - case kFirstDigit:
|
| - if (base::IsHexDigit(ch)) {
|
| - hex_buffer[1] = ch;
|
| - uint64_t hex_value = ParseLeadingHex64Value(hex_buffer, 0);
|
| - decoded_url->append(1, static_cast<char>(hex_value));
|
| - state = kStart;
|
| - } else {
|
| - return false;
|
| - }
|
| - break;
|
| - case kTruncate:
|
| - if (ch == dir_separator) {
|
| - // Skip this separator, it was only put in to break up long
|
| - // path segments, but is not part of the URL.
|
| - state = kStart;
|
| - } else {
|
| - return false;
|
| - }
|
| - break;
|
| - case kEscapeDot:
|
| - decoded_url->append(1, ch);
|
| - state = kStart;
|
| - break;
|
| - }
|
| - }
|
| -
|
| - // All legal encoded filenames end in kEscapeChar.
|
| - return (state == kEscape);
|
| -}
|
| -
|
| -// Escape the given input |path| and chop any individual components
|
| -// of the path which are greater than kMaximumSubdirectoryLength characters
|
| -// into two chunks.
|
| -//
|
| -// This legacy version has several issues with aliasing of different URLs,
|
| -// inability to represent both /a/b/c and /a/b/c/d, and inability to decode
|
| -// the filenames back into URLs.
|
| -//
|
| -// But there is a large body of slurped data which depends on this format,
|
| -// so leave it as the default for spdy_in_mem_edsm_server.
|
| -string UrlToFilenameEncoder::LegacyEscape(const string& path) {
|
| - string output;
|
| -
|
| - // Note: We also chop paths into medium sized 'chunks'.
|
| - // This is due to the incompetence of the windows
|
| - // filesystem, which still hasn't figured out how
|
| - // to deal with long filenames.
|
| - int last_slash = 0;
|
| - for (size_t index = 0; index < path.length(); index++) {
|
| - char ch = path[index];
|
| - if (ch == 0x5C)
|
| - last_slash = index;
|
| - if ((ch == 0x2D) || // hyphen
|
| - (ch == 0x5C) || (ch == 0x5F) || // backslash, underscore
|
| - ((0x30 <= ch) && (ch <= 0x39)) || // Digits [0-9]
|
| - ((0x41 <= ch) && (ch <= 0x5A)) || // Uppercase [A-Z]
|
| - ((0x61 <= ch) && (ch <= 0x7A))) { // Lowercase [a-z]
|
| - output.append(&path[index], 1);
|
| - } else {
|
| - char encoded[3];
|
| - encoded[0] = 'x';
|
| - encoded[1] = ch / 16;
|
| - encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
|
| - encoded[2] = ch % 16;
|
| - encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
|
| - output.append(encoded, 3);
|
| - }
|
| - if (index - last_slash > kMaximumSubdirectoryLength) {
|
| -#ifdef WIN32
|
| - char slash = '\\';
|
| -#else
|
| - char slash = '/';
|
| -#endif
|
| - output.append(&slash, 1);
|
| - last_slash = index;
|
| - }
|
| - }
|
| - return output;
|
| -}
|
| -
|
| -} // namespace net
|
|
|