chrome/browser/url_fixer_upper.cc - Issue 18305: Move url_* to net subdir

Side by Side Diff: chrome/browser/url_fixer_upper.cc

Issue 18305: Move url_* to net subdir (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 11 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include <algorithm>

6 #include <windows.h>

7

8 #include "chrome/browser/url_fixer_upper.h"

9

10 #include "base/file_util.h"

11 #include "base/logging.h"

12 #include "base/string_util.h"

13 #include "chrome/common/gfx/text_elider.h"

14 #include "googleurl/src/gurl.h"

15 #include "googleurl/src/url_canon.h"

16 #include "googleurl/src/url_file.h"

17 #include "googleurl/src/url_parse.h"

18 #include "googleurl/src/url_util.h"

19 #include "net/base/escape.h"

20 #include "net/base/net_util.h"

21 #include "net/base/registry_controlled_domain.h"

22

23 using namespace std;

24

25 // does some basic fixes for input that we want to test for file-ness

26 static void PrepareStringForFileOps(const wstring& text, wstring* output) {

27 TrimWhitespace(text, TRIM_ALL, output);

28 replace(output->begin(), output->end(), '/', '\\');

29 }

30

31 // Tries to create a full path from \|text\|. If the result is valid and the

32 // file exists, returns true and sets \|full_path\| to the result. Otherwise,

33 // returns false and leaves \|full_path\| unchanged.

34 static bool ValidPathForFile(const wstring& text, wstring* full_path) {

35 wchar_t file_path[MAX_PATH];

36 if (!_wfullpath(file_path, text.c_str(), MAX_PATH))

37 return false;

38

39 if (!file_util::PathExists(file_path))

40 return false;

41

42 full_path->assign(file_path);

43 return true;

44 }

45

46 // Tries to create a file: URL from \|text\| if it looks like a filename, even if

47 // it doesn't resolve as a valid path or to an existing file. Returns true

48 // with a (possibly invalid) file: URL in \|fixed_up_url\| for input beginning

49 // with a drive specifier or "\\". Returns false in other cases (including

50 // file: URLs: these don't look like filenames), leaving fixed_up_url

51 // unchanged.

52 static wstring FixupPath(const wstring& text) {

53 DCHECK(text.length() >= 2);

54

55 wstring filename;

56 PrepareStringForFileOps(text, &filename);

57

58 if (filename[1] == '\|')

59 filename[1] = ':';

60

61 // Here, we know the input looks like a file.

62 GURL file_url = net::FilePathToFileURL(filename);

63 if (file_url.is_valid())

64 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring());

65

66 // Invalid file URL, just return the input.

67 return text;

68 }

69

70 // Checks \|domain\| to see if a valid TLD is already present. If not, appends

71 // \|desired_tld\| to the domain, and prepends "www." unless it's already present.

72 // Then modifies \|fixed_up_url\| to reflect the changes.

73 static void AddDesiredTLD(const wstring& desired_tld,

74 wstring* domain) {

75 if (desired_tld.empty() \|\| domain->empty())

76 return;

77

78 // Check the TLD. If the return value is positive, we already have a TLD, so

79 // abort; if the return value is wstring::npos, there's no valid host (e.g. if

80 // the user pasted in garbage for which HistoryURLProvider is trying to

81 // suggest an exact match), so adding a TLD makes no sense. The only useful

82 // case is where the return value is 0 (there's a valid host with no known

83 // TLD). We disallow unknown registries here so users can input "mail.yahoo"

84 // and hit ctrl-enter to get "www.mail.yahoo.com".

85 const size_t registry_length =

86 net::RegistryControlledDomainService::GetRegistryLength(*domain, false);

87 if (registry_length != 0)

88 return;

89

90 // Add the suffix at the end of the domain.

91 const size_t domain_length(domain->length());

92 DCHECK(domain_length > 0);

93 DCHECK(desired_tld[0] != '.');

94 if ((*domain)[domain_length - 1] != '.')

95 domain->push_back('.');

96 domain->append(desired_tld);

97

98 // Now, if the domain begins with "www.", stop.

99 const wstring prefix(L"www.");

100 if (domain->compare(0, prefix.length(), prefix) != 0) {

101 // Otherwise, add www. to the beginning of the URL.

102 domain->insert(0, prefix);

103 }

104 }

105

106 static inline void FixupUsername(const wstring& text,

107 const url_parse::Component& part,

108 wstring* url) {

109 if (!part.is_valid())

110 return;

111

112 // We don't fix up the username at the moment.

113 url->append(text, part.begin, part.len);

114 // Do not append the trailing '@' because we might need to include the user's

115 // password. FixupURL itself will append the '@' for us.

116 }

117

118 static inline void FixupPassword(const wstring& text,

119 const url_parse::Component& part,

120 wstring* url) {

121 if (!part.is_valid())

122 return;

123

124 // We don't fix up the password at the moment.

125 url->append(L":");

126 url->append(text, part.begin, part.len);

127 }

128

129 static void FixupHost(const wstring& text,

130 const url_parse::Component& part,

131 bool has_scheme,

132 const wstring& desired_tld,

133 wstring* url) {

134 if (!part.is_valid())

135 return;

136

137 // Make domain valid.

138 // Strip all leading dots and all but one trailing dot, unless the user only

139 // typed dots, in which case their input is totally invalid and we should just

140 // leave it unchanged.

141 wstring domain(text, part.begin, part.len);

142 const size_t first_nondot(domain.find_first_not_of('.'));

143 if (first_nondot != wstring::npos) {

144 domain.erase(0, first_nondot);

145 size_t last_nondot(domain.find_last_not_of('.'));

146 DCHECK(last_nondot != wstring::npos);

147 last_nondot += 2; // Point at second period in ending string

148 if (last_nondot < domain.length())

149 domain.erase(last_nondot);

150 }

151

152 // Add any user-specified TLD, if applicable.

153 AddDesiredTLD(desired_tld, &domain);

154

155 url->append(domain);

156 }

157

158 // Looks for a port number, including initial colon, at port_start. If

159 // something invalid (which cannot be fixed up) is found, like ":foo" or

160 // ":7:7", returns false. Otherwise, removes any extra colons

161 // ("::1337" -> ":1337", ":/" -> "/") and returns true.

162 static void FixupPort(const wstring& text,

163 const url_parse::Component& part,

164 wstring* url) {

165 if (!part.is_valid())

166 return;

167

168 // Look for non-digit in port and strip if found.

169 wstring port(text, part.begin, part.len);

170 for (wstring::iterator i = port.begin(); i != port.end(); ) {

171 if (IsAsciiDigit(*i))

172 ++i;

173 else

174 i = port.erase(i);

175 }

176

177 if (port.empty())

178 return; // Nothing to append.

179

180 url->append(L":");

181 url->append(port);

182 }

183

184 static inline void FixupPath(const wstring& text,

185 const url_parse::Component& part,

186 wstring* url) {

187 if (!part.is_valid() \|\| part.len == 0) {

188 // We should always have a path.

189 url->append(L"/");

190 return;

191 }

192

193 // Append the path as is.

194 url->append(text, part.begin, part.len);

195 }

196

197 static inline void FixupQuery(const wstring& text,

198 const url_parse::Component& part,

199 wstring* url) {

200 if (!part.is_valid())

201 return;

202

203 // We don't fix up the query at the moment.

204 url->append(L"?");

205 url->append(text, part.begin, part.len);

206 }

207

208 static inline void FixupRef(const wstring& text,

209 const url_parse::Component& part,

210 wstring* url) {

211 if (!part.is_valid())

212 return;

213

214 // We don't fix up the ref at the moment.

215 url->append(L"#");

216 url->append(text, part.begin, part.len);

217 }

218

219 static void OffsetComponent(int offset, url_parse::Component* part) {

220 DCHECK(part);

221

222 if (part->is_valid()) {

223 // Offset the location of this component.

224 part->begin += offset;

225

226 // This part might not have existed in the original text.

227 if (part->begin < 0)

228 part->reset();

229 }

230 }

231

232 static bool HasPort(const std::wstring& original_text,

233 const url_parse::Component& scheme_component,

234 const std::wstring& scheme) {

235 // Find the range between the ":" and the "/".

236 size_t port_start = scheme_component.end() + 1;

237 size_t port_end = port_start;

238 while ((port_end < original_text.length()) &&

239 !url_parse::IsAuthorityTerminator(original_text[port_end]))

240 ++port_end;

241 if (port_end == port_start)

242 return false;

243

244 // Scan the range to see if it is entirely digits.

245 for (size_t i = port_start; i < port_end; ++i) {

246 if (!IsAsciiDigit(original_text[i]))

247 return false;

248 }

249

250 return true;

251 }

252

253 wstring URLFixerUpper::SegmentURL(const wstring& text,

254 url_parse::Parsed* parts) {

255 // Initialize the result.

256 *parts = url_parse::Parsed();

257

258 wstring trimmed;

259 TrimWhitespace(text, TRIM_ALL, &trimmed);

260 if (trimmed.empty())

261 return wstring(); // Nothing to segment.

262

263 int trimmed_length = static_cast<int>(trimmed.length());

264 if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length)

265 \|\| url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false))

266 return L"file";

267

268 // Otherwise, we need to look at things carefully.

269 wstring scheme;

270 if (url_parse::ExtractScheme(text.data(),

271 static_cast<int>(text.length()),

272 &parts->scheme)) {

273 // We were able to extract a scheme. Remember what we have, but we may

274 // decide to change our minds later.

275 scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len));

276

277 if (parts->scheme.is_valid() &&

278 // Valid schemes are ASCII-only.

279 (!IsStringASCII(scheme) \|\|

280 // We need to fix up the segmentation for "www.example.com:/". For this

281 // case, we guess that schemes with a "." are not actually schemes.

282 (scheme.find(L".") != wstring::npos) \|\|

283 // We need to fix up the segmentation for "www:123/". For this case, we

284 // will add an HTTP scheme later and make the URL parser happy.

285 // TODO(pkasting): Maybe we should try to use GURL's parser for this?

286 HasPort(text, parts->scheme, scheme)))

287 parts->scheme.reset();

288 }

289

290 // When we couldn't find a scheme in the input, we need to pick one. Normally

291 // we choose http, but if the URL starts with "ftp.", we match other browsers

292 // and choose ftp.

293 if (!parts->scheme.is_valid())

294 scheme.assign(StartsWith(text, L"ftp.", false) ? L"ftp" : L"http");

295

296 // Cannonicalize the scheme.

297 StringToLowerASCII(&scheme);

298

299 // Not segmenting file schemes or nonstandard schemes.

300 if ((scheme == L"file") \|\|

301 !url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()),

302 url_parse::Component(0, static_cast<int>(scheme.length()))))

303 return scheme;

304

305 if (parts->scheme.is_valid()) {

306 // Have the GURL parser do the heavy lifting for us.

307 url_parse::ParseStandardURL(text.data(), static_cast<int>(text.length()),

308 parts);

309 return scheme;

310 }

311

312 // We need to add a scheme in order for ParseStandardURL to be happy.

313 // Find the first non-whitespace character.

314 wstring::const_iterator first_nonwhite = text.begin();

315 while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite))

316 ++first_nonwhite;

317

318 // Construct the text to parse by inserting the scheme.

319 wstring inserted_text(scheme);

320 inserted_text.append(L"://");

321 wstring text_to_parse(text.begin(), first_nonwhite);

322 text_to_parse.append(inserted_text);

323 text_to_parse.append(first_nonwhite, text.end());

324

325 // Have the GURL parser do the heavy lifting for us.

326 url_parse::ParseStandardURL(text_to_parse.data(),

327 static_cast<int>(text_to_parse.length()),

328 parts);

329

330 // Offset the results of the parse to match the original text.

331 const int offset = -static_cast<int>(inserted_text.length());

332 OffsetComponent(offset, &parts->scheme);

333 OffsetComponent(offset, &parts->username);

334 OffsetComponent(offset, &parts->password);

335 OffsetComponent(offset, &parts->host);

336 OffsetComponent(offset, &parts->port);

337 OffsetComponent(offset, &parts->path);

338 OffsetComponent(offset, &parts->query);

339 OffsetComponent(offset, &parts->ref);

340

341 return scheme;

342 }

343

344 std::wstring URLFixerUpper::FixupURL(const wstring& text,

345 const wstring& desired_tld) {

346 wstring trimmed;

347 TrimWhitespace(text, TRIM_ALL, &trimmed);

348 if (trimmed.empty())

349 return wstring(); // Nothing here.

350

351 // Segment the URL.

352 url_parse::Parsed parts;

353 wstring scheme(SegmentURL(trimmed, &parts));

354

355 // We handle the file scheme separately.

356 if (scheme == L"file")

357 return (parts.scheme.is_valid() ? text : FixupPath(text));

358

359 // For some schemes whose layouts we understand, we rebuild it.

360 if (url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()),

361 url_parse::Component(0, static_cast<int>(scheme.length())))) {

362 wstring url(scheme);

363 url.append(L"://");

364

365 // We need to check whether the \|username\| is valid because it is our

366 // responsibility to append the '@' to delineate the user information from

367 // the host portion of the URL.

368 if (parts.username.is_valid()) {

369 FixupUsername(trimmed, parts.username, &url);

370 FixupPassword(trimmed, parts.password, &url);

371 url.append(L"@");

372 }

373

374 FixupHost(trimmed, parts.host, parts.scheme.is_valid(), desired_tld, &url);

375 FixupPort(trimmed, parts.port, &url);

376 FixupPath(trimmed, parts.path, &url);

377 FixupQuery(trimmed, parts.query, &url);

378 FixupRef(trimmed, parts.ref, &url);

379

380 return url;

381 }

382

383 // In the worst-case, we insert a scheme if the URL lacks one.

384 if (!parts.scheme.is_valid()) {

385 wstring fixed_scheme(scheme);

386 fixed_scheme.append(L"://");

387 trimmed.insert(0, fixed_scheme);

388 }

389

390 return trimmed;

391 }

392

393 // The rules are different here than for regular fixup, since we need to handle

394 // input like "hello.html" and know to look in the current directory. Regular

395 // fixup will look for cues that it is actually a file path before trying to

396 // figure out what file it is. If our logic doesn't work, we will fall back on

397 // regular fixup.

398 wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir,

399 const wstring& text) {

400 wchar_t old_cur_directory[MAX_PATH];

401 if (!base_dir.empty()) {

402 // save the old current directory before we move to the new one

403 // TODO: in the future, we may want to handle paths longer than MAX_PATH

404 GetCurrentDirectory(MAX_PATH, old_cur_directory);

405 SetCurrentDirectory(base_dir.c_str());

406 }

407

408 // allow funny input with extra whitespace and the wrong kind of slashes

409 wstring trimmed;

410 PrepareStringForFileOps(text, &trimmed);

411

412 bool is_file = true;

413 wstring full_path;

414 if (!ValidPathForFile(trimmed, &full_path)) {

415 // Not a path as entered, try unescaping it in case the user has

416 // escaped things. We need to go through 8-bit since the escaped values

417 // only represent 8-bit values.

418 std::wstring unescaped = UTF8ToWide(UnescapeURLComponent(

419 WideToUTF8(trimmed),

420 UnescapeRule::SPACES \| UnescapeRule::URL_SPECIAL_CHARS));

421 if (!ValidPathForFile(unescaped, &full_path))

422 is_file = false;

423 }

424

425 // Put back the current directory if we saved it.

426 if (!base_dir.empty())

427 SetCurrentDirectory(old_cur_directory);

428

429 if (is_file) {

430 GURL file_url = net::FilePathToFileURL(full_path);

431 if (file_url.is_valid())

432 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring());

433 // Invalid files fall through to regular processing.

434 }

435

436 // Fall back on regular fixup for this input.

437 return FixupURL(text, L"");

438 }

439

OLD	NEW

« no previous file with comments | « chrome/browser/url_fixer_upper.h ('k') | chrome/browser/url_fixer_upper_unittest.cc » ('j') | no next file with comments »