net/tools/dump_cache/url_to_filename_encoder.cc - Issue 992733002: Remove //net (except for Android test stuff) and sdch

Side by Side Diff: net/tools/dump_cache/url_to_filename_encoder.cc

Issue 992733002: Remove //net (except for Android test stuff) and sdch (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include <stdlib.h>

6

7 #include "base/logging.h"

8 #include "base/strings/string_util.h"

9 #include "net/base/net_util.h"

10 #include "net/tools/dump_cache/url_to_filename_encoder.h"

11

12 using std::string;

13

14 namespace {

15

16 // Returns 1 if buf is prefixed by "num_digits" of hex digits

17 // Teturns 0 otherwise.

18 // The function checks for '\0' for string termination.

19 int HexDigitsPrefix(const char* buf, int num_digits) {

20 for (int i = 0; i < num_digits; i++) {

21 if (!IsHexDigit(buf[i]))

22 return 0; // This also detects end of string as '\0' is not xdigit.

23 }

24 return 1;

25 }

26

27 #ifdef WIN32

28 #define strtoull _strtoui64

29 #endif

30

31 // A simple parser for long long values. Returns the parsed value if a

32 // valid integer is found; else returns deflt

33 // UInt64 and Int64 cannot handle decimal numbers with leading 0s.

34 uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {

35 char *error = NULL;

36 const uint64 value = strtoull(str, &error, 16);

37 return (error == str) ? deflt : value;

38 }

39

40 }

41

42 namespace net {

43

44 // The escape character choice is made here -- all code and tests in this

45 // directory are based off of this constant. However, our testdata

46 // has tons of dependencies on this, so it cannot be changed without

47 // re-running those tests and fixing them.

48 const char UrlToFilenameEncoder::kEscapeChar = ',';

49 const char UrlToFilenameEncoder::kTruncationChar = '-';

50 const size_t UrlToFilenameEncoder::kMaximumSubdirectoryLength = 128;

51

52 void UrlToFilenameEncoder::AppendSegment(string* segment, string* dest) {

53 CHECK(!segment->empty());

54 if ((segment == ".") \|\| (segment == "..")) {

55 dest->append(1, kEscapeChar);

56 dest->append(*segment);

57 segment->clear();

58 } else {

59 size_t segment_size = segment->size();

60 if (segment_size > kMaximumSubdirectoryLength) {

61 // We need to inject ",-" at the end of the segment to signify that

62 // we are inserting an artificial '/'. This means we have to chop

63 // off at least two characters to make room.

64 segment_size = kMaximumSubdirectoryLength - 2;

65

66 // But we don't want to break up an escape sequence that happens to lie at

67 // the end. Escape sequences are at most 2 characters.

68 if ((*segment)[segment_size - 1] == kEscapeChar) {

69 segment_size -= 1;

70 } else if ((*segment)[segment_size - 2] == kEscapeChar) {

71 segment_size -= 2;

72 }

73 dest->append(segment->data(), segment_size);

74 dest->append(1, kEscapeChar);

75 dest->append(1, kTruncationChar);

76 segment->erase(0, segment_size);

77

78 // At this point, if we had segment_size=3, and segment="abcd",

79 // then after this erase, we will have written "abc,-" and set segment="d"

80 } else {

81 dest->append(*segment);

82 segment->clear();

83 }

84 }

85 }

86

87 void UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix,

88 const string& escaped_ending,

89 char dir_separator,

90 string* encoded_filename) {

91 string filename_ending = UrlUtilities::Unescape(escaped_ending);

92

93 char encoded[3];

94 int encoded_len;

95 string segment;

96

97 // TODO(jmarantz): This code would be a bit simpler if we disallowed

98 // Instaweb allowing filename_prefix to not end in "/". We could

99 // then change the is routine to just take one input string.

100 size_t start_of_segment = filename_prefix.find_last_of(dir_separator);

101 if (start_of_segment == string::npos) {

102 segment = filename_prefix;

103 } else {

104 segment = filename_prefix.substr(start_of_segment + 1);

105 *encoded_filename = filename_prefix.substr(0, start_of_segment + 1);

106 }

107

108 size_t index = 0;

109 // Special case the first / to avoid adding a leading kEscapeChar.

110 if (!filename_ending.empty() && (filename_ending[0] == dir_separator)) {

111 encoded_filename->append(segment);

112 segment.clear();

113 encoded_filename->append(1, dir_separator);

114 ++index;

115 }

116

117 for (; index < filename_ending.length(); ++index) {

118 unsigned char ch = static_cast<unsigned char>(filename_ending[index]);

119

120 // Note: instead of outputing an empty segment, we let the second slash

121 // be escaped below.

122 if ((ch == dir_separator) && !segment.empty()) {

123 AppendSegment(&segment, encoded_filename);

124 encoded_filename->append(1, dir_separator);

125 segment.clear();

126 } else {

127 // After removing unsafe chars the only safe ones are _.=+- and alphanums.

128 if ((ch == '_') \|\| (ch == '.') \|\| (ch == '=') \|\| (ch == '+') \|\|

129 (ch == '-') \|\| (('0' <= ch) && (ch <= '9')) \|\|

130 (('A' <= ch) && (ch <= 'Z')) \|\| (('a' <= ch) && (ch <= 'z'))) {

131 encoded[0] = ch;

132 encoded_len = 1;

133 } else {

134 encoded[0] = kEscapeChar;

135 encoded[1] = ch / 16;

136 encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';

137 encoded[2] = ch % 16;

138 encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';

139 encoded_len = 3;

140 }

141 segment.append(encoded, encoded_len);

142

143 // If segment is too big, we must chop it into chunks.

144 if (segment.size() > kMaximumSubdirectoryLength) {

145 AppendSegment(&segment, encoded_filename);

146 encoded_filename->append(1, dir_separator);

147 }

148 }

149 }

150

151 // Append "," to the leaf filename so the leaf can also be a branch., e.g.

152 // allow http://a/b/c and http://a/b/c/d to co-exist as files "/a/b/c," and

153 // /a/b/c/d". So we will rename the "d" here to "d,". If doing that pushed

154 // us over the 128 char limit, then we will need to append "/" and the

155 // remaining chars.

156 segment += kEscapeChar;

157 AppendSegment(&segment, encoded_filename);

158 if (!segment.empty()) {

159 // The last overflow segment is special, because we appended in

160 // kEscapeChar above. We won't need to check it again for size

161 // or further escaping.

162 encoded_filename->append(1, dir_separator);

163 encoded_filename->append(segment);

164 }

165 }

166

167 // Note: this decoder is not the exact inverse of the EncodeSegment above,

168 // because it does not take into account a prefix.

169 bool UrlToFilenameEncoder::Decode(const string& encoded_filename,

170 char dir_separator,

171 string* decoded_url) {

172 enum State {

173 kStart,

174 kEscape,

175 kFirstDigit,

176 kTruncate,

177 kEscapeDot

178 };

179 State state = kStart;

180 char hex_buffer[3];

181 hex_buffer[2] = '\0';

182 for (size_t i = 0; i < encoded_filename.size(); ++i) {

183 char ch = encoded_filename[i];

184 switch (state) {

185 case kStart:

186 if (ch == kEscapeChar) {

187 state = kEscape;

188 } else if (ch == dir_separator) {

189 decoded_url->append(1, '/'); // URLs only use '/' not '\\'

190 } else {

191 decoded_url->append(1, ch);

192 }

193 break;

194 case kEscape:

195 if (HexDigitsPrefix(&ch, 1) == 1) {

196 hex_buffer[0] = ch;

197 state = kFirstDigit;

198 } else if (ch == kTruncationChar) {

199 state = kTruncate;

200 } else if (ch == '.') {

201 decoded_url->append(1, '.');

202 state = kEscapeDot; // Look for at most one more dot.

203 } else if (ch == dir_separator) {

204 // Consider url "//x". This was once encoded to "/,/x,".

205 // This code is what skips the first Escape.

206 decoded_url->append(1, '/'); // URLs only use '/' not '\\'

207 state = kStart;

208 } else {

209 return false;

210 }

211 break;

212 case kFirstDigit:

213 if (HexDigitsPrefix(&ch, 1) == 1) {

214 hex_buffer[1] = ch;

215 uint64 hex_value = ParseLeadingHex64Value(hex_buffer, 0);

216 decoded_url->append(1, static_cast<char>(hex_value));

217 state = kStart;

218 } else {

219 return false;

220 }

221 break;

222 case kTruncate:

223 if (ch == dir_separator) {

224 // Skip this separator, it was only put in to break up long

225 // path segments, but is not part of the URL.

226 state = kStart;

227 } else {

228 return false;

229 }

230 break;

231 case kEscapeDot:

232 decoded_url->append(1, ch);

233 state = kStart;

234 break;

235 }

236 }

237

238 // All legal encoded filenames end in kEscapeChar.

239 return (state == kEscape);

240 }

241

242 // Escape the given input \|path\| and chop any individual components

243 // of the path which are greater than kMaximumSubdirectoryLength characters

244 // into two chunks.

245 //

246 // This legacy version has several issues with aliasing of different URLs,

247 // inability to represent both /a/b/c and /a/b/c/d, and inability to decode

248 // the filenames back into URLs.

249 //

250 // But there is a large body of slurped data which depends on this format,

251 // so leave it as the default for spdy_in_mem_edsm_server.

252 string UrlToFilenameEncoder::LegacyEscape(const string& path) {

253 string output;

254

255 // Note: We also chop paths into medium sized 'chunks'.

256 // This is due to the incompetence of the windows

257 // filesystem, which still hasn't figured out how

258 // to deal with long filenames.

259 int last_slash = 0;

260 for (size_t index = 0; index < path.length(); index++) {

261 char ch = path[index];

262 if (ch == 0x5C)

263 last_slash = index;

264 if ((ch == 0x2D) \|\| // hyphen

265 (ch == 0x5C) \|\| (ch == 0x5F) \|\| // backslash, underscore

266 ((0x30 <= ch) && (ch <= 0x39)) \|\| // Digits [0-9]

267 ((0x41 <= ch) && (ch <= 0x5A)) \|\| // Uppercase [A-Z]

268 ((0x61 <= ch) && (ch <= 0x7A))) { // Lowercase [a-z]

269 output.append(&path[index], 1);

270 } else {

271 char encoded[3];

272 encoded[0] = 'x';

273 encoded[1] = ch / 16;

274 encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';

275 encoded[2] = ch % 16;

276 encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';

277 output.append(encoded, 3);

278 }

279 if (index - last_slash > kMaximumSubdirectoryLength) {

280 #ifdef WIN32

281 char slash = '\\';

282 #else

283 char slash = '/';

284 #endif

285 output.append(&slash, 1);

286 last_slash = index;

287 }

288 }

289 return output;

290 }

291

292 } // namespace net

OLD	NEW

« no previous file with comments | « net/tools/dump_cache/url_to_filename_encoder.h ('k') | net/tools/dump_cache/url_to_filename_encoder_unittest.cc » ('j') | no next file with comments »