OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <algorithm> | 5 #include <algorithm> |
| 6 #include <windows.h> |
6 | 7 |
7 #include "chrome/browser/url_fixer_upper.h" | 8 #include "chrome/browser/url_fixer_upper.h" |
8 | 9 |
9 #include "base/file_util.h" | 10 #include "base/file_util.h" |
10 #include "base/logging.h" | 11 #include "base/logging.h" |
11 #include "base/string_util.h" | 12 #include "base/string_util.h" |
12 #include "chrome/common/gfx/text_elider.h" | 13 #include "chrome/common/gfx/text_elider.h" |
13 #include "googleurl/src/gurl.h" | 14 #include "googleurl/src/gurl.h" |
14 #include "googleurl/src/url_canon.h" | 15 #include "googleurl/src/url_canon.h" |
15 #include "googleurl/src/url_file.h" | 16 #include "googleurl/src/url_file.h" |
16 #include "googleurl/src/url_parse.h" | 17 #include "googleurl/src/url_parse.h" |
17 #include "googleurl/src/url_util.h" | 18 #include "googleurl/src/url_util.h" |
18 #include "net/base/escape.h" | 19 #include "net/base/escape.h" |
19 #include "net/base/net_util.h" | 20 #include "net/base/net_util.h" |
20 #include "net/base/registry_controlled_domain.h" | 21 #include "net/base/registry_controlled_domain.h" |
21 | 22 |
22 using namespace std; | 23 using namespace std; |
23 | 24 |
24 // does some basic fixes for input that we want to test for file-ness | 25 // does some basic fixes for input that we want to test for file-ness |
25 static void PrepareStringForFileOps(const wstring& text, wstring* output) { | 26 static void PrepareStringForFileOps(const wstring& text, wstring* output) { |
26 TrimWhitespace(text, TRIM_ALL, output); | 27 TrimWhitespace(text, TRIM_ALL, output); |
27 replace(output->begin(), output->end(), '/', '\\'); | 28 replace(output->begin(), output->end(), '/', '\\'); |
28 } | 29 } |
29 | 30 |
30 // Tries to create a full path from |text|. If the result is valid and the | 31 // Tries to create a full path from |text|. If the result is valid and the |
31 // file exists, returns true and sets |full_path| to the result. Otherwise, | 32 // file exists, returns true and sets |full_path| to the result. Otherwise, |
32 // returns false and leaves |full_path| unchanged. | 33 // returns false and leaves |full_path| unchanged. |
33 static bool ValidPathForFile(const wstring& text, wstring* full_path) { | 34 static bool ValidPathForFile(const wstring& text, wstring* full_path) { |
34 wstring file_path(text); | 35 wchar_t file_path[MAX_PATH]; |
35 if (!file_util::AbsolutePath(&file_path)) | 36 if (!_wfullpath(file_path, text.c_str(), MAX_PATH)) |
36 return false; | 37 return false; |
37 | 38 |
38 if (!file_util::PathExists(file_path)) | 39 if (!file_util::PathExists(file_path)) |
39 return false; | 40 return false; |
40 | 41 |
41 full_path->assign(file_path); | 42 full_path->assign(file_path); |
42 return true; | 43 return true; |
43 } | 44 } |
44 | 45 |
45 // Tries to create a file: URL from |text| if it looks like a filename, even if | 46 // Tries to create a file: URL from |text| if it looks like a filename, even if |
(...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
247 } | 248 } |
248 | 249 |
249 return true; | 250 return true; |
250 } | 251 } |
251 | 252 |
252 wstring URLFixerUpper::SegmentURL(const wstring& text, | 253 wstring URLFixerUpper::SegmentURL(const wstring& text, |
253 url_parse::Parsed* parts) { | 254 url_parse::Parsed* parts) { |
254 // Initialize the result. | 255 // Initialize the result. |
255 *parts = url_parse::Parsed(); | 256 *parts = url_parse::Parsed(); |
256 | 257 |
257 #if defined(OS_WIN) | |
258 wstring trimmed; | 258 wstring trimmed; |
259 TrimWhitespace(text, TRIM_ALL, &trimmed); | 259 TrimWhitespace(text, TRIM_ALL, &trimmed); |
260 if (trimmed.empty()) | 260 if (trimmed.empty()) |
261 return wstring(); // Nothing to segment. | 261 return wstring(); // Nothing to segment. |
262 | 262 |
263 int trimmed_length = static_cast<int>(trimmed.length()); | 263 int trimmed_length = static_cast<int>(trimmed.length()); |
264 if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) | 264 if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) |
265 || url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false)) | 265 || url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false)) |
266 return L"file"; | 266 return L"file"; |
267 #endif | |
268 | 267 |
269 // Otherwise, we need to look at things carefully. | 268 // Otherwise, we need to look at things carefully. |
270 wstring scheme; | 269 wstring scheme; |
271 string text_utf8 = WideToUTF8(text); | 270 if (url_parse::ExtractScheme(text.data(), |
272 if (url_parse::ExtractScheme(text_utf8.c_str(), | 271 static_cast<int>(text.length()), |
273 static_cast<int>(text_utf8.length()), | |
274 &parts->scheme)) { | 272 &parts->scheme)) { |
275 // We were able to extract a scheme. Remember what we have, but we may | 273 // We were able to extract a scheme. Remember what we have, but we may |
276 // decide to change our minds later. | 274 // decide to change our minds later. |
277 scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len)); | 275 scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len)); |
278 | 276 |
279 if (parts->scheme.is_valid() && | 277 if (parts->scheme.is_valid() && |
280 // Valid schemes are ASCII-only. | 278 // Valid schemes are ASCII-only. |
281 (!IsStringASCII(scheme) || | 279 (!IsStringASCII(scheme) || |
282 // We need to fix up the segmentation for "www.example.com:/". For this | 280 // We need to fix up the segmentation for "www.example.com:/". For this |
283 // case, we guess that schemes with a "." are not actually schemes. | 281 // case, we guess that schemes with a "." are not actually schemes. |
(...skipping 13 matching lines...) Expand all Loading... |
297 } else { | 295 } else { |
298 // Having been unable to extract a scheme, we default to HTTP. | 296 // Having been unable to extract a scheme, we default to HTTP. |
299 scheme.assign(L"http"); | 297 scheme.assign(L"http"); |
300 scheme_end = 0; | 298 scheme_end = 0; |
301 } | 299 } |
302 | 300 |
303 // Cannonicalize the scheme. | 301 // Cannonicalize the scheme. |
304 StringToLowerASCII(&scheme); | 302 StringToLowerASCII(&scheme); |
305 | 303 |
306 // Not segmenting file schemes or nonstandard schemes. | 304 // Not segmenting file schemes or nonstandard schemes. |
307 string scheme_utf8 = WideToUTF8(scheme); | |
308 if ((scheme == L"file") || | 305 if ((scheme == L"file") || |
309 !url_util::IsStandard(scheme_utf8.c_str(), | 306 !url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()), |
310 static_cast<int>(scheme_utf8.length()), | 307 url_parse::Component(0, static_cast<int>(scheme.length())))) |
311 url_parse::Component(0, static_cast<int>(scheme_utf8.length())))) | |
312 return scheme; | 308 return scheme; |
313 | 309 |
314 if (parts->scheme.is_valid()) { | 310 if (parts->scheme.is_valid()) { |
315 // Have the GURL parser do the heavy lifting for us. | 311 // Have the GURL parser do the heavy lifting for us. |
316 string text_utf8 = WideToUTF8(text); | 312 url_parse::ParseStandardURL(text.data(), static_cast<int>(text.length()), |
317 url_parse::ParseStandardURL(text_utf8.c_str(), | |
318 static_cast<int>(text_utf8.length()), | |
319 parts); | 313 parts); |
320 return scheme; | 314 return scheme; |
321 } | 315 } |
322 | 316 |
323 // We need to add a scheme in order for ParseStandardURL to be happy. | 317 // We need to add a scheme in order for ParseStandardURL to be happy. |
324 // Find the first non-whitespace character. | 318 // Find the first non-whitespace character. |
325 wstring::const_iterator first_nonwhite = text.begin(); | 319 wstring::const_iterator first_nonwhite = text.begin(); |
326 while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite)) | 320 while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite)) |
327 ++first_nonwhite; | 321 ++first_nonwhite; |
328 | 322 |
329 // Construct the text to parse by inserting the scheme. | 323 // Construct the text to parse by inserting the scheme. |
330 wstring inserted_text(scheme); | 324 wstring inserted_text(scheme); |
331 inserted_text.append(L"://"); | 325 inserted_text.append(L"://"); |
332 wstring text_to_parse(text.begin(), first_nonwhite); | 326 wstring text_to_parse(text.begin(), first_nonwhite); |
333 text_to_parse.append(inserted_text); | 327 text_to_parse.append(inserted_text); |
334 text_to_parse.append(first_nonwhite, text.end()); | 328 text_to_parse.append(first_nonwhite, text.end()); |
335 | 329 |
336 // Have the GURL parser do the heavy lifting for us. | 330 // Have the GURL parser do the heavy lifting for us. |
337 string text_to_parse_utf8 = WideToUTF8(text_to_parse); | 331 url_parse::ParseStandardURL(text_to_parse.data(), |
338 url_parse::ParseStandardURL(text_to_parse_utf8.c_str(), | 332 static_cast<int>(text_to_parse.length()), |
339 static_cast<int>(text_to_parse_utf8.length()), | |
340 parts); | 333 parts); |
341 | 334 |
342 // Offset the results of the parse to match the original text. | 335 // Offset the results of the parse to match the original text. |
343 const int offset = -static_cast<int>(inserted_text.length()); | 336 const int offset = -static_cast<int>(inserted_text.length()); |
344 OffsetComponent(offset, &parts->scheme); | 337 OffsetComponent(offset, &parts->scheme); |
345 OffsetComponent(offset, &parts->username); | 338 OffsetComponent(offset, &parts->username); |
346 OffsetComponent(offset, &parts->password); | 339 OffsetComponent(offset, &parts->password); |
347 OffsetComponent(offset, &parts->host); | 340 OffsetComponent(offset, &parts->host); |
348 OffsetComponent(offset, &parts->port); | 341 OffsetComponent(offset, &parts->port); |
349 OffsetComponent(offset, &parts->path); | 342 OffsetComponent(offset, &parts->path); |
(...skipping 12 matching lines...) Expand all Loading... |
362 | 355 |
363 // Segment the URL. | 356 // Segment the URL. |
364 url_parse::Parsed parts; | 357 url_parse::Parsed parts; |
365 wstring scheme(SegmentURL(trimmed, &parts)); | 358 wstring scheme(SegmentURL(trimmed, &parts)); |
366 | 359 |
367 // We handle the file scheme separately. | 360 // We handle the file scheme separately. |
368 if (scheme == L"file") | 361 if (scheme == L"file") |
369 return (parts.scheme.is_valid() ? text : FixupPath(text)); | 362 return (parts.scheme.is_valid() ? text : FixupPath(text)); |
370 | 363 |
371 // For some schemes whose layouts we understand, we rebuild it. | 364 // For some schemes whose layouts we understand, we rebuild it. |
372 if (url_util::IsStandard( | 365 if (url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()), |
373 WideToUTF8(scheme).c_str(), static_cast<int>(scheme.length()), | 366 url_parse::Component(0, static_cast<int>(scheme.length())))) { |
374 url_parse::Component(0, static_cast<int>(scheme.length())))) { | |
375 wstring url(scheme); | 367 wstring url(scheme); |
376 url.append(L"://"); | 368 url.append(L"://"); |
377 | 369 |
378 // We need to check whether the |username| is valid because it is our | 370 // We need to check whether the |username| is valid because it is our |
379 // responsibility to append the '@' to delineate the user information from | 371 // responsibility to append the '@' to delineate the user information from |
380 // the host portion of the URL. | 372 // the host portion of the URL. |
381 if (parts.username.is_valid()) { | 373 if (parts.username.is_valid()) { |
382 FixupUsername(trimmed, parts.username, &url); | 374 FixupUsername(trimmed, parts.username, &url); |
383 FixupPassword(trimmed, parts.password, &url); | 375 FixupPassword(trimmed, parts.password, &url); |
384 url.append(L"@"); | 376 url.append(L"@"); |
(...skipping 18 matching lines...) Expand all Loading... |
403 return trimmed; | 395 return trimmed; |
404 } | 396 } |
405 | 397 |
406 // The rules are different here than for regular fixup, since we need to handle | 398 // The rules are different here than for regular fixup, since we need to handle |
407 // input like "hello.html" and know to look in the current directory. Regular | 399 // input like "hello.html" and know to look in the current directory. Regular |
408 // fixup will look for cues that it is actually a file path before trying to | 400 // fixup will look for cues that it is actually a file path before trying to |
409 // figure out what file it is. If our logic doesn't work, we will fall back on | 401 // figure out what file it is. If our logic doesn't work, we will fall back on |
410 // regular fixup. | 402 // regular fixup. |
411 wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir, | 403 wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir, |
412 const wstring& text) { | 404 const wstring& text) { |
413 wstring old_cur_directory; | 405 wchar_t old_cur_directory[MAX_PATH]; |
414 if (!base_dir.empty()) { | 406 if (!base_dir.empty()) { |
415 // save the old current directory before we move to the new one | 407 // save the old current directory before we move to the new one |
416 file_util::GetCurrentDirectory(&old_cur_directory); | 408 // TODO: in the future, we may want to handle paths longer than MAX_PATH |
417 file_util::SetCurrentDirectory(base_dir); | 409 GetCurrentDirectory(MAX_PATH, old_cur_directory); |
| 410 SetCurrentDirectory(base_dir.c_str()); |
418 } | 411 } |
419 | 412 |
420 // allow funny input with extra whitespace and the wrong kind of slashes | 413 // allow funny input with extra whitespace and the wrong kind of slashes |
421 wstring trimmed; | 414 wstring trimmed; |
422 PrepareStringForFileOps(text, &trimmed); | 415 PrepareStringForFileOps(text, &trimmed); |
423 | 416 |
424 bool is_file = true; | 417 bool is_file = true; |
425 wstring full_path; | 418 wstring full_path; |
426 if (!ValidPathForFile(trimmed, &full_path)) { | 419 if (!ValidPathForFile(trimmed, &full_path)) { |
427 // Not a path as entered, try unescaping it in case the user has | 420 // Not a path as entered, try unescaping it in case the user has |
428 // escaped things. We need to go through 8-bit since the escaped values | 421 // escaped things. We need to go through 8-bit since the escaped values |
429 // only represent 8-bit values. | 422 // only represent 8-bit values. |
430 std::wstring unescaped = UTF8ToWide(UnescapeURLComponent( | 423 std::wstring unescaped = UTF8ToWide(UnescapeURLComponent( |
431 WideToUTF8(trimmed), | 424 WideToUTF8(trimmed), |
432 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)); | 425 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)); |
433 if (!ValidPathForFile(unescaped, &full_path)) | 426 if (!ValidPathForFile(unescaped, &full_path)) |
434 is_file = false; | 427 is_file = false; |
435 } | 428 } |
436 | 429 |
437 // Put back the current directory if we saved it. | 430 // Put back the current directory if we saved it. |
438 if (!base_dir.empty()) | 431 if (!base_dir.empty()) |
439 file_util::SetCurrentDirectory(old_cur_directory); | 432 SetCurrentDirectory(old_cur_directory); |
440 | 433 |
441 if (is_file) { | 434 if (is_file) { |
442 GURL file_url = net::FilePathToFileURL(full_path); | 435 GURL file_url = net::FilePathToFileURL(full_path); |
443 if (file_url.is_valid()) | 436 if (file_url.is_valid()) |
444 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring()); | 437 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring()); |
445 // Invalid files fall through to regular processing. | 438 // Invalid files fall through to regular processing. |
446 } | 439 } |
447 | 440 |
448 // Fall back on regular fixup for this input. | 441 // Fall back on regular fixup for this input. |
449 return FixupURL(text, L""); | 442 return FixupURL(text, L""); |
450 } | 443 } |
451 | 444 |
OLD | NEW |