net/tools/flip_server/url_to_filename_encoder.cc - Issue 2169503002: Remove flip_server.

Side by Side Diff: net/tools/flip_server/url_to_filename_encoder.cc

Issue 2169503002: Remove flip_server. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "net/tools/flip_server/url_to_filename_encoder.h"

6

7 #include <stdlib.h>

8

9 #include "base/logging.h"

10 #include "base/strings/string_util.h"

11

12 using std::string;

13

14 namespace {

15

16 #ifdef WIN32

17 #define strtoull _strtoui64

18 #endif

19

20 // A simple parser for long long values. Returns the parsed value if a

21 // valid integer is found; else returns deflt

22 // UInt64 and Int64 cannot handle decimal numbers with leading 0s.

23 uint64_t ParseLeadingHex64Value(const char* str, uint64_t deflt) {

24 char* error = NULL;

25 const uint64_t value = strtoull(str, &error, 16);

26 return (error == str) ? deflt : value;

27 }

28

29 } // namespace

30

31 namespace net {

32

33 // The escape character choice is made here -- all code and tests in this

34 // directory are based off of this constant. However, our testdata

35 // has tons of dependencies on this, so it cannot be changed without

36 // re-running those tests and fixing them.

37 const char UrlToFilenameEncoder::kEscapeChar = ',';

38 const char UrlToFilenameEncoder::kTruncationChar = '-';

39 const size_t UrlToFilenameEncoder::kMaximumSubdirectoryLength = 128;

40

41 void UrlToFilenameEncoder::AppendSegment(string* segment, string* dest) {

42 CHECK(!segment->empty());

43 if ((segment == ".") \|\| (segment == "..")) {

44 dest->append(1, kEscapeChar);

45 dest->append(*segment);

46 segment->clear();

47 } else {

48 size_t segment_size = segment->size();

49 if (segment_size > kMaximumSubdirectoryLength) {

50 // We need to inject ",-" at the end of the segment to signify that

51 // we are inserting an artificial '/'. This means we have to chop

52 // off at least two characters to make room.

53 segment_size = kMaximumSubdirectoryLength - 2;

54

55 // But we don't want to break up an escape sequence that happens to lie at

56 // the end. Escape sequences are at most 2 characters.

57 if ((*segment)[segment_size - 1] == kEscapeChar) {

58 segment_size -= 1;

59 } else if ((*segment)[segment_size - 2] == kEscapeChar) {

60 segment_size -= 2;

61 }

62 dest->append(segment->data(), segment_size);

63 dest->append(1, kEscapeChar);

64 dest->append(1, kTruncationChar);

65 segment->erase(0, segment_size);

66

67 // At this point, if we had segment_size=3, and segment="abcd",

68 // then after this erase, we will have written "abc,-" and set segment="d"

69 } else {

70 dest->append(*segment);

71 segment->clear();

72 }

73 }

74 }

75

76 void UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix,

77 const string& escaped_ending,

78 char dir_separator,

79 string* encoded_filename) {

80 string filename_ending = UrlUtilities::Unescape(escaped_ending);

81

82 char encoded[3];

83 int encoded_len;

84 string segment;

85

86 // TODO(jmarantz): This code would be a bit simpler if we disallowed

87 // Instaweb allowing filename_prefix to not end in "/". We could

88 // then change the is routine to just take one input string.

89 size_t start_of_segment = filename_prefix.find_last_of(dir_separator);

90 if (start_of_segment == string::npos) {

91 segment = filename_prefix;

92 } else {

93 segment = filename_prefix.substr(start_of_segment + 1);

94 *encoded_filename = filename_prefix.substr(0, start_of_segment + 1);

95 }

96

97 size_t index = 0;

98 // Special case the first / to avoid adding a leading kEscapeChar.

99 if (!filename_ending.empty() && (filename_ending[0] == dir_separator)) {

100 encoded_filename->append(segment);

101 segment.clear();

102 encoded_filename->append(1, dir_separator);

103 ++index;

104 }

105

106 for (; index < filename_ending.length(); ++index) {

107 unsigned char ch = static_cast<unsigned char>(filename_ending[index]);

108

109 // Note: instead of outputing an empty segment, we let the second slash

110 // be escaped below.

111 if ((ch == dir_separator) && !segment.empty()) {

112 AppendSegment(&segment, encoded_filename);

113 encoded_filename->append(1, dir_separator);

114 segment.clear();

115 } else {

116 // After removing unsafe chars the only safe ones are _.=+- and alphanums.

117 if ((ch == '_') \|\| (ch == '.') \|\| (ch == '=') \|\| (ch == '+') \|\|

118 (ch == '-') \|\| (('0' <= ch) && (ch <= '9')) \|\|

119 (('A' <= ch) && (ch <= 'Z')) \|\| (('a' <= ch) && (ch <= 'z'))) {

120 encoded[0] = ch;

121 encoded_len = 1;

122 } else {

123 encoded[0] = kEscapeChar;

124 encoded[1] = ch / 16;

125 encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';

126 encoded[2] = ch % 16;

127 encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';

128 encoded_len = 3;

129 }

130 segment.append(encoded, encoded_len);

131

132 // If segment is too big, we must chop it into chunks.

133 if (segment.size() > kMaximumSubdirectoryLength) {

134 AppendSegment(&segment, encoded_filename);

135 encoded_filename->append(1, dir_separator);

136 }

137 }

138 }

139

140 // Append "," to the leaf filename so the leaf can also be a branch., e.g.

141 // allow http://a/b/c and http://a/b/c/d to co-exist as files "/a/b/c," and

142 // /a/b/c/d". So we will rename the "d" here to "d,". If doing that pushed

143 // us over the 128 char limit, then we will need to append "/" and the

144 // remaining chars.

145 segment += kEscapeChar;

146 AppendSegment(&segment, encoded_filename);

147 if (!segment.empty()) {

148 // The last overflow segment is special, because we appended in

149 // kEscapeChar above. We won't need to check it again for size

150 // or further escaping.

151 encoded_filename->append(1, dir_separator);

152 encoded_filename->append(segment);

153 }

154 }

155

156 // Note: this decoder is not the exact inverse of the EncodeSegment above,

157 // because it does not take into account a prefix.

158 bool UrlToFilenameEncoder::Decode(const string& encoded_filename,

159 char dir_separator,

160 string* decoded_url) {

161 enum State { kStart, kEscape, kFirstDigit, kTruncate, kEscapeDot };

162 State state = kStart;

163 char hex_buffer[3];

164 hex_buffer[2] = '\0';

165 for (size_t i = 0; i < encoded_filename.size(); ++i) {

166 char ch = encoded_filename[i];

167 switch (state) {

168 case kStart:

169 if (ch == kEscapeChar) {

170 state = kEscape;

171 } else if (ch == dir_separator) {

172 decoded_url->append(1, '/'); // URLs only use '/' not '\\'

173 } else {

174 decoded_url->append(1, ch);

175 }

176 break;

177 case kEscape:

178 if (base::IsHexDigit(ch)) {

179 hex_buffer[0] = ch;

180 state = kFirstDigit;

181 } else if (ch == kTruncationChar) {

182 state = kTruncate;

183 } else if (ch == '.') {

184 decoded_url->append(1, '.');

185 state = kEscapeDot; // Look for at most one more dot.

186 } else if (ch == dir_separator) {

187 // Consider url "//x". This was once encoded to "/,/x,".

188 // This code is what skips the first Escape.

189 decoded_url->append(1, '/'); // URLs only use '/' not '\\'

190 state = kStart;

191 } else {

192 return false;

193 }

194 break;

195 case kFirstDigit:

196 if (base::IsHexDigit(ch)) {

197 hex_buffer[1] = ch;

198 uint64_t hex_value = ParseLeadingHex64Value(hex_buffer, 0);

199 decoded_url->append(1, static_cast<char>(hex_value));

200 state = kStart;

201 } else {

202 return false;

203 }

204 break;

205 case kTruncate:

206 if (ch == dir_separator) {

207 // Skip this separator, it was only put in to break up long

208 // path segments, but is not part of the URL.

209 state = kStart;

210 } else {

211 return false;

212 }

213 break;

214 case kEscapeDot:

215 decoded_url->append(1, ch);

216 state = kStart;

217 break;

218 }

219 }

220

221 // All legal encoded filenames end in kEscapeChar.

222 return (state == kEscape);

223 }

224

225 // Escape the given input \|path\| and chop any individual components

226 // of the path which are greater than kMaximumSubdirectoryLength characters

227 // into two chunks.

228 //

229 // This legacy version has several issues with aliasing of different URLs,

230 // inability to represent both /a/b/c and /a/b/c/d, and inability to decode

231 // the filenames back into URLs.

232 //

233 // But there is a large body of slurped data which depends on this format,

234 // so leave it as the default for spdy_in_mem_edsm_server.

235 string UrlToFilenameEncoder::LegacyEscape(const string& path) {

236 string output;

237

238 // Note: We also chop paths into medium sized 'chunks'.

239 // This is due to the incompetence of the windows

240 // filesystem, which still hasn't figured out how

241 // to deal with long filenames.

242 int last_slash = 0;

243 for (size_t index = 0; index < path.length(); index++) {

244 char ch = path[index];

245 if (ch == 0x5C)

246 last_slash = index;

247 if ((ch == 0x2D) \|\| // hyphen

248 (ch == 0x5C) \|\| (ch == 0x5F) \|\| // backslash, underscore

249 ((0x30 <= ch) && (ch <= 0x39)) \|\| // Digits [0-9]

250 ((0x41 <= ch) && (ch <= 0x5A)) \|\| // Uppercase [A-Z]

251 ((0x61 <= ch) && (ch <= 0x7A))) { // Lowercase [a-z]

252 output.append(&path[index], 1);

253 } else {

254 char encoded[3];

255 encoded[0] = 'x';

256 encoded[1] = ch / 16;

257 encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';

258 encoded[2] = ch % 16;

259 encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';

260 output.append(encoded, 3);

261 }

262 if (index - last_slash > kMaximumSubdirectoryLength) {

263 #ifdef WIN32

264 char slash = '\\';

265 #else

266 char slash = '/';

267 #endif

268 output.append(&slash, 1);

269 last_slash = index;

270 }

271 }

272 return output;

273 }

274

275 } // namespace net

OLD	NEW

« no previous file with comments | « net/tools/flip_server/url_to_filename_encoder.h ('k') | net/tools/flip_server/url_to_filename_encoder_unittest.cc » ('j') | no next file with comments »