Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1011)

Side by Side Diff: net/tools/flip_server/url_to_filename_encoder.cc

Issue 2169503002: Remove flip_server. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/tools/flip_server/url_to_filename_encoder.h"
6
7 #include <stdlib.h>
8
9 #include "base/logging.h"
10 #include "base/strings/string_util.h"
11
12 using std::string;
13
14 namespace {
15
16 #ifdef WIN32
17 #define strtoull _strtoui64
18 #endif
19
20 // A simple parser for long long values. Returns the parsed value if a
21 // valid integer is found; else returns deflt
22 // UInt64 and Int64 cannot handle decimal numbers with leading 0s.
23 uint64_t ParseLeadingHex64Value(const char* str, uint64_t deflt) {
24 char* error = NULL;
25 const uint64_t value = strtoull(str, &error, 16);
26 return (error == str) ? deflt : value;
27 }
28
29 } // namespace
30
31 namespace net {
32
33 // The escape character choice is made here -- all code and tests in this
34 // directory are based off of this constant. However, our testdata
35 // has tons of dependencies on this, so it cannot be changed without
36 // re-running those tests and fixing them.
37 const char UrlToFilenameEncoder::kEscapeChar = ',';
38 const char UrlToFilenameEncoder::kTruncationChar = '-';
39 const size_t UrlToFilenameEncoder::kMaximumSubdirectoryLength = 128;
40
41 void UrlToFilenameEncoder::AppendSegment(string* segment, string* dest) {
42 CHECK(!segment->empty());
43 if ((*segment == ".") || (*segment == "..")) {
44 dest->append(1, kEscapeChar);
45 dest->append(*segment);
46 segment->clear();
47 } else {
48 size_t segment_size = segment->size();
49 if (segment_size > kMaximumSubdirectoryLength) {
50 // We need to inject ",-" at the end of the segment to signify that
51 // we are inserting an artificial '/'. This means we have to chop
52 // off at least two characters to make room.
53 segment_size = kMaximumSubdirectoryLength - 2;
54
55 // But we don't want to break up an escape sequence that happens to lie at
56 // the end. Escape sequences are at most 2 characters.
57 if ((*segment)[segment_size - 1] == kEscapeChar) {
58 segment_size -= 1;
59 } else if ((*segment)[segment_size - 2] == kEscapeChar) {
60 segment_size -= 2;
61 }
62 dest->append(segment->data(), segment_size);
63 dest->append(1, kEscapeChar);
64 dest->append(1, kTruncationChar);
65 segment->erase(0, segment_size);
66
67 // At this point, if we had segment_size=3, and segment="abcd",
68 // then after this erase, we will have written "abc,-" and set segment="d"
69 } else {
70 dest->append(*segment);
71 segment->clear();
72 }
73 }
74 }
75
76 void UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix,
77 const string& escaped_ending,
78 char dir_separator,
79 string* encoded_filename) {
80 string filename_ending = UrlUtilities::Unescape(escaped_ending);
81
82 char encoded[3];
83 int encoded_len;
84 string segment;
85
86 // TODO(jmarantz): This code would be a bit simpler if we disallowed
87 // Instaweb allowing filename_prefix to not end in "/". We could
88 // then change the is routine to just take one input string.
89 size_t start_of_segment = filename_prefix.find_last_of(dir_separator);
90 if (start_of_segment == string::npos) {
91 segment = filename_prefix;
92 } else {
93 segment = filename_prefix.substr(start_of_segment + 1);
94 *encoded_filename = filename_prefix.substr(0, start_of_segment + 1);
95 }
96
97 size_t index = 0;
98 // Special case the first / to avoid adding a leading kEscapeChar.
99 if (!filename_ending.empty() && (filename_ending[0] == dir_separator)) {
100 encoded_filename->append(segment);
101 segment.clear();
102 encoded_filename->append(1, dir_separator);
103 ++index;
104 }
105
106 for (; index < filename_ending.length(); ++index) {
107 unsigned char ch = static_cast<unsigned char>(filename_ending[index]);
108
109 // Note: instead of outputing an empty segment, we let the second slash
110 // be escaped below.
111 if ((ch == dir_separator) && !segment.empty()) {
112 AppendSegment(&segment, encoded_filename);
113 encoded_filename->append(1, dir_separator);
114 segment.clear();
115 } else {
116 // After removing unsafe chars the only safe ones are _.=+- and alphanums.
117 if ((ch == '_') || (ch == '.') || (ch == '=') || (ch == '+') ||
118 (ch == '-') || (('0' <= ch) && (ch <= '9')) ||
119 (('A' <= ch) && (ch <= 'Z')) || (('a' <= ch) && (ch <= 'z'))) {
120 encoded[0] = ch;
121 encoded_len = 1;
122 } else {
123 encoded[0] = kEscapeChar;
124 encoded[1] = ch / 16;
125 encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
126 encoded[2] = ch % 16;
127 encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
128 encoded_len = 3;
129 }
130 segment.append(encoded, encoded_len);
131
132 // If segment is too big, we must chop it into chunks.
133 if (segment.size() > kMaximumSubdirectoryLength) {
134 AppendSegment(&segment, encoded_filename);
135 encoded_filename->append(1, dir_separator);
136 }
137 }
138 }
139
140 // Append "," to the leaf filename so the leaf can also be a branch., e.g.
141 // allow http://a/b/c and http://a/b/c/d to co-exist as files "/a/b/c," and
142 // /a/b/c/d". So we will rename the "d" here to "d,". If doing that pushed
143 // us over the 128 char limit, then we will need to append "/" and the
144 // remaining chars.
145 segment += kEscapeChar;
146 AppendSegment(&segment, encoded_filename);
147 if (!segment.empty()) {
148 // The last overflow segment is special, because we appended in
149 // kEscapeChar above. We won't need to check it again for size
150 // or further escaping.
151 encoded_filename->append(1, dir_separator);
152 encoded_filename->append(segment);
153 }
154 }
155
156 // Note: this decoder is not the exact inverse of the EncodeSegment above,
157 // because it does not take into account a prefix.
158 bool UrlToFilenameEncoder::Decode(const string& encoded_filename,
159 char dir_separator,
160 string* decoded_url) {
161 enum State { kStart, kEscape, kFirstDigit, kTruncate, kEscapeDot };
162 State state = kStart;
163 char hex_buffer[3];
164 hex_buffer[2] = '\0';
165 for (size_t i = 0; i < encoded_filename.size(); ++i) {
166 char ch = encoded_filename[i];
167 switch (state) {
168 case kStart:
169 if (ch == kEscapeChar) {
170 state = kEscape;
171 } else if (ch == dir_separator) {
172 decoded_url->append(1, '/'); // URLs only use '/' not '\\'
173 } else {
174 decoded_url->append(1, ch);
175 }
176 break;
177 case kEscape:
178 if (base::IsHexDigit(ch)) {
179 hex_buffer[0] = ch;
180 state = kFirstDigit;
181 } else if (ch == kTruncationChar) {
182 state = kTruncate;
183 } else if (ch == '.') {
184 decoded_url->append(1, '.');
185 state = kEscapeDot; // Look for at most one more dot.
186 } else if (ch == dir_separator) {
187 // Consider url "//x". This was once encoded to "/,/x,".
188 // This code is what skips the first Escape.
189 decoded_url->append(1, '/'); // URLs only use '/' not '\\'
190 state = kStart;
191 } else {
192 return false;
193 }
194 break;
195 case kFirstDigit:
196 if (base::IsHexDigit(ch)) {
197 hex_buffer[1] = ch;
198 uint64_t hex_value = ParseLeadingHex64Value(hex_buffer, 0);
199 decoded_url->append(1, static_cast<char>(hex_value));
200 state = kStart;
201 } else {
202 return false;
203 }
204 break;
205 case kTruncate:
206 if (ch == dir_separator) {
207 // Skip this separator, it was only put in to break up long
208 // path segments, but is not part of the URL.
209 state = kStart;
210 } else {
211 return false;
212 }
213 break;
214 case kEscapeDot:
215 decoded_url->append(1, ch);
216 state = kStart;
217 break;
218 }
219 }
220
221 // All legal encoded filenames end in kEscapeChar.
222 return (state == kEscape);
223 }
224
225 // Escape the given input |path| and chop any individual components
226 // of the path which are greater than kMaximumSubdirectoryLength characters
227 // into two chunks.
228 //
229 // This legacy version has several issues with aliasing of different URLs,
230 // inability to represent both /a/b/c and /a/b/c/d, and inability to decode
231 // the filenames back into URLs.
232 //
233 // But there is a large body of slurped data which depends on this format,
234 // so leave it as the default for spdy_in_mem_edsm_server.
235 string UrlToFilenameEncoder::LegacyEscape(const string& path) {
236 string output;
237
238 // Note: We also chop paths into medium sized 'chunks'.
239 // This is due to the incompetence of the windows
240 // filesystem, which still hasn't figured out how
241 // to deal with long filenames.
242 int last_slash = 0;
243 for (size_t index = 0; index < path.length(); index++) {
244 char ch = path[index];
245 if (ch == 0x5C)
246 last_slash = index;
247 if ((ch == 0x2D) || // hyphen
248 (ch == 0x5C) || (ch == 0x5F) || // backslash, underscore
249 ((0x30 <= ch) && (ch <= 0x39)) || // Digits [0-9]
250 ((0x41 <= ch) && (ch <= 0x5A)) || // Uppercase [A-Z]
251 ((0x61 <= ch) && (ch <= 0x7A))) { // Lowercase [a-z]
252 output.append(&path[index], 1);
253 } else {
254 char encoded[3];
255 encoded[0] = 'x';
256 encoded[1] = ch / 16;
257 encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
258 encoded[2] = ch % 16;
259 encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
260 output.append(encoded, 3);
261 }
262 if (index - last_slash > kMaximumSubdirectoryLength) {
263 #ifdef WIN32
264 char slash = '\\';
265 #else
266 char slash = '/';
267 #endif
268 output.append(&slash, 1);
269 last_slash = index;
270 }
271 }
272 return output;
273 }
274
275 } // namespace net
OLDNEW
« no previous file with comments | « net/tools/flip_server/url_to_filename_encoder.h ('k') | net/tools/flip_server/url_to_filename_encoder_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698