OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <algorithm> | 5 #include <algorithm> |
6 #include <windows.h> | |
7 | 6 |
8 #include "chrome/browser/url_fixer_upper.h" | 7 #include "chrome/browser/url_fixer_upper.h" |
9 | 8 |
10 #include "base/file_util.h" | 9 #include "base/file_util.h" |
11 #include "base/logging.h" | 10 #include "base/logging.h" |
12 #include "base/string_util.h" | 11 #include "base/string_util.h" |
13 #include "chrome/common/gfx/text_elider.h" | 12 #include "chrome/common/gfx/text_elider.h" |
14 #include "googleurl/src/gurl.h" | 13 #include "googleurl/src/gurl.h" |
15 #include "googleurl/src/url_canon.h" | 14 #include "googleurl/src/url_canon.h" |
16 #include "googleurl/src/url_file.h" | 15 #include "googleurl/src/url_file.h" |
17 #include "googleurl/src/url_parse.h" | 16 #include "googleurl/src/url_parse.h" |
18 #include "googleurl/src/url_util.h" | 17 #include "googleurl/src/url_util.h" |
19 #include "net/base/escape.h" | 18 #include "net/base/escape.h" |
20 #include "net/base/net_util.h" | 19 #include "net/base/net_util.h" |
21 #include "net/base/registry_controlled_domain.h" | 20 #include "net/base/registry_controlled_domain.h" |
22 | 21 |
23 using namespace std; | 22 using namespace std; |
24 | 23 |
25 // does some basic fixes for input that we want to test for file-ness | 24 // does some basic fixes for input that we want to test for file-ness |
26 static void PrepareStringForFileOps(const wstring& text, wstring* output) { | 25 static void PrepareStringForFileOps(const wstring& text, wstring* output) { |
27 TrimWhitespace(text, TRIM_ALL, output); | 26 TrimWhitespace(text, TRIM_ALL, output); |
28 replace(output->begin(), output->end(), '/', '\\'); | 27 replace(output->begin(), output->end(), '/', '\\'); |
29 } | 28 } |
30 | 29 |
31 // Tries to create a full path from |text|. If the result is valid and the | 30 // Tries to create a full path from |text|. If the result is valid and the |
32 // file exists, returns true and sets |full_path| to the result. Otherwise, | 31 // file exists, returns true and sets |full_path| to the result. Otherwise, |
33 // returns false and leaves |full_path| unchanged. | 32 // returns false and leaves |full_path| unchanged. |
34 static bool ValidPathForFile(const wstring& text, wstring* full_path) { | 33 static bool ValidPathForFile(const wstring& text, wstring* full_path) { |
35 wchar_t file_path[MAX_PATH]; | 34 wstring file_path(text); |
36 if (!_wfullpath(file_path, text.c_str(), MAX_PATH)) | 35 if (!file_util::AbsolutePath(&file_path)) |
37 return false; | 36 return false; |
38 | 37 |
39 if (!file_util::PathExists(file_path)) | 38 if (!file_util::PathExists(file_path)) |
40 return false; | 39 return false; |
41 | 40 |
42 full_path->assign(file_path); | 41 full_path->assign(file_path); |
43 return true; | 42 return true; |
44 } | 43 } |
45 | 44 |
46 // Tries to create a file: URL from |text| if it looks like a filename, even if | 45 // Tries to create a file: URL from |text| if it looks like a filename, even if |
(...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
248 } | 247 } |
249 | 248 |
250 return true; | 249 return true; |
251 } | 250 } |
252 | 251 |
253 wstring URLFixerUpper::SegmentURL(const wstring& text, | 252 wstring URLFixerUpper::SegmentURL(const wstring& text, |
254 url_parse::Parsed* parts) { | 253 url_parse::Parsed* parts) { |
255 // Initialize the result. | 254 // Initialize the result. |
256 *parts = url_parse::Parsed(); | 255 *parts = url_parse::Parsed(); |
257 | 256 |
| 257 #if defined(OS_WIN) |
258 wstring trimmed; | 258 wstring trimmed; |
259 TrimWhitespace(text, TRIM_ALL, &trimmed); | 259 TrimWhitespace(text, TRIM_ALL, &trimmed); |
260 if (trimmed.empty()) | 260 if (trimmed.empty()) |
261 return wstring(); // Nothing to segment. | 261 return wstring(); // Nothing to segment. |
262 | 262 |
263 int trimmed_length = static_cast<int>(trimmed.length()); | 263 int trimmed_length = static_cast<int>(trimmed.length()); |
264 if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) | 264 if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) |
265 || url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false)) | 265 || url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false)) |
266 return L"file"; | 266 return L"file"; |
| 267 #endif |
267 | 268 |
268 // Otherwise, we need to look at things carefully. | 269 // Otherwise, we need to look at things carefully. |
269 wstring scheme; | 270 wstring scheme; |
270 if (url_parse::ExtractScheme(text.data(), | 271 string text_utf8 = WideToUTF8(text); |
271 static_cast<int>(text.length()), | 272 if (url_parse::ExtractScheme(text_utf8.c_str(), |
| 273 static_cast<int>(text_utf8.length()), |
272 &parts->scheme)) { | 274 &parts->scheme)) { |
273 // We were able to extract a scheme. Remember what we have, but we may | 275 // We were able to extract a scheme. Remember what we have, but we may |
274 // decide to change our minds later. | 276 // decide to change our minds later. |
275 scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len)); | 277 scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len)); |
276 | 278 |
277 if (parts->scheme.is_valid() && | 279 if (parts->scheme.is_valid() && |
278 // Valid schemes are ASCII-only. | 280 // Valid schemes are ASCII-only. |
279 (!IsStringASCII(scheme) || | 281 (!IsStringASCII(scheme) || |
280 // We need to fix up the segmentation for "www.example.com:/". For this | 282 // We need to fix up the segmentation for "www.example.com:/". For this |
281 // case, we guess that schemes with a "." are not actually schemes. | 283 // case, we guess that schemes with a "." are not actually schemes. |
(...skipping 13 matching lines...) Expand all Loading... |
295 } else { | 297 } else { |
296 // Having been unable to extract a scheme, we default to HTTP. | 298 // Having been unable to extract a scheme, we default to HTTP. |
297 scheme.assign(L"http"); | 299 scheme.assign(L"http"); |
298 scheme_end = 0; | 300 scheme_end = 0; |
299 } | 301 } |
300 | 302 |
301 // Cannonicalize the scheme. | 303 // Cannonicalize the scheme. |
302 StringToLowerASCII(&scheme); | 304 StringToLowerASCII(&scheme); |
303 | 305 |
304 // Not segmenting file schemes or nonstandard schemes. | 306 // Not segmenting file schemes or nonstandard schemes. |
| 307 string scheme_utf8 = WideToUTF8(scheme); |
305 if ((scheme == L"file") || | 308 if ((scheme == L"file") || |
306 !url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()), | 309 !url_util::IsStandard(scheme_utf8.c_str(), |
307 url_parse::Component(0, static_cast<int>(scheme.length())))) | 310 static_cast<int>(scheme_utf8.length()), |
| 311 url_parse::Component(0, static_cast<int>(scheme_utf8.length())))) |
308 return scheme; | 312 return scheme; |
309 | 313 |
310 if (parts->scheme.is_valid()) { | 314 if (parts->scheme.is_valid()) { |
311 // Have the GURL parser do the heavy lifting for us. | 315 // Have the GURL parser do the heavy lifting for us. |
312 url_parse::ParseStandardURL(text.data(), static_cast<int>(text.length()), | 316 string text_utf8 = WideToUTF8(text); |
| 317 url_parse::ParseStandardURL(text_utf8.c_str(), |
| 318 static_cast<int>(text_utf8.length()), |
313 parts); | 319 parts); |
314 return scheme; | 320 return scheme; |
315 } | 321 } |
316 | 322 |
317 // We need to add a scheme in order for ParseStandardURL to be happy. | 323 // We need to add a scheme in order for ParseStandardURL to be happy. |
318 // Find the first non-whitespace character. | 324 // Find the first non-whitespace character. |
319 wstring::const_iterator first_nonwhite = text.begin(); | 325 wstring::const_iterator first_nonwhite = text.begin(); |
320 while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite)) | 326 while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite)) |
321 ++first_nonwhite; | 327 ++first_nonwhite; |
322 | 328 |
323 // Construct the text to parse by inserting the scheme. | 329 // Construct the text to parse by inserting the scheme. |
324 wstring inserted_text(scheme); | 330 wstring inserted_text(scheme); |
325 inserted_text.append(L"://"); | 331 inserted_text.append(L"://"); |
326 wstring text_to_parse(text.begin(), first_nonwhite); | 332 wstring text_to_parse(text.begin(), first_nonwhite); |
327 text_to_parse.append(inserted_text); | 333 text_to_parse.append(inserted_text); |
328 text_to_parse.append(first_nonwhite, text.end()); | 334 text_to_parse.append(first_nonwhite, text.end()); |
329 | 335 |
330 // Have the GURL parser do the heavy lifting for us. | 336 // Have the GURL parser do the heavy lifting for us. |
331 url_parse::ParseStandardURL(text_to_parse.data(), | 337 string text_to_parse_utf8 = WideToUTF8(text_to_parse); |
332 static_cast<int>(text_to_parse.length()), | 338 url_parse::ParseStandardURL(text_to_parse_utf8.c_str(), |
| 339 static_cast<int>(text_to_parse_utf8.length()), |
333 parts); | 340 parts); |
334 | 341 |
335 // Offset the results of the parse to match the original text. | 342 // Offset the results of the parse to match the original text. |
336 const int offset = -static_cast<int>(inserted_text.length()); | 343 const int offset = -static_cast<int>(inserted_text.length()); |
337 OffsetComponent(offset, &parts->scheme); | 344 OffsetComponent(offset, &parts->scheme); |
338 OffsetComponent(offset, &parts->username); | 345 OffsetComponent(offset, &parts->username); |
339 OffsetComponent(offset, &parts->password); | 346 OffsetComponent(offset, &parts->password); |
340 OffsetComponent(offset, &parts->host); | 347 OffsetComponent(offset, &parts->host); |
341 OffsetComponent(offset, &parts->port); | 348 OffsetComponent(offset, &parts->port); |
342 OffsetComponent(offset, &parts->path); | 349 OffsetComponent(offset, &parts->path); |
(...skipping 12 matching lines...) Expand all Loading... |
355 | 362 |
356 // Segment the URL. | 363 // Segment the URL. |
357 url_parse::Parsed parts; | 364 url_parse::Parsed parts; |
358 wstring scheme(SegmentURL(trimmed, &parts)); | 365 wstring scheme(SegmentURL(trimmed, &parts)); |
359 | 366 |
360 // We handle the file scheme separately. | 367 // We handle the file scheme separately. |
361 if (scheme == L"file") | 368 if (scheme == L"file") |
362 return (parts.scheme.is_valid() ? text : FixupPath(text)); | 369 return (parts.scheme.is_valid() ? text : FixupPath(text)); |
363 | 370 |
364 // For some schemes whose layouts we understand, we rebuild it. | 371 // For some schemes whose layouts we understand, we rebuild it. |
365 if (url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()), | 372 if (url_util::IsStandard( |
366 url_parse::Component(0, static_cast<int>(scheme.length())))) { | 373 WideToUTF8(scheme).c_str(), static_cast<int>(scheme.length()), |
| 374 url_parse::Component(0, static_cast<int>(scheme.length())))) { |
367 wstring url(scheme); | 375 wstring url(scheme); |
368 url.append(L"://"); | 376 url.append(L"://"); |
369 | 377 |
370 // We need to check whether the |username| is valid because it is our | 378 // We need to check whether the |username| is valid because it is our |
371 // responsibility to append the '@' to delineate the user information from | 379 // responsibility to append the '@' to delineate the user information from |
372 // the host portion of the URL. | 380 // the host portion of the URL. |
373 if (parts.username.is_valid()) { | 381 if (parts.username.is_valid()) { |
374 FixupUsername(trimmed, parts.username, &url); | 382 FixupUsername(trimmed, parts.username, &url); |
375 FixupPassword(trimmed, parts.password, &url); | 383 FixupPassword(trimmed, parts.password, &url); |
376 url.append(L"@"); | 384 url.append(L"@"); |
(...skipping 18 matching lines...) Expand all Loading... |
395 return trimmed; | 403 return trimmed; |
396 } | 404 } |
397 | 405 |
398 // The rules are different here than for regular fixup, since we need to handle | 406 // The rules are different here than for regular fixup, since we need to handle |
399 // input like "hello.html" and know to look in the current directory. Regular | 407 // input like "hello.html" and know to look in the current directory. Regular |
400 // fixup will look for cues that it is actually a file path before trying to | 408 // fixup will look for cues that it is actually a file path before trying to |
401 // figure out what file it is. If our logic doesn't work, we will fall back on | 409 // figure out what file it is. If our logic doesn't work, we will fall back on |
402 // regular fixup. | 410 // regular fixup. |
403 wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir, | 411 wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir, |
404 const wstring& text) { | 412 const wstring& text) { |
405 wchar_t old_cur_directory[MAX_PATH]; | 413 wstring old_cur_directory; |
406 if (!base_dir.empty()) { | 414 if (!base_dir.empty()) { |
407 // save the old current directory before we move to the new one | 415 // save the old current directory before we move to the new one |
408 // TODO: in the future, we may want to handle paths longer than MAX_PATH | 416 file_util::GetCurrentDirectory(&old_cur_directory); |
409 GetCurrentDirectory(MAX_PATH, old_cur_directory); | 417 file_util::SetCurrentDirectory(base_dir); |
410 SetCurrentDirectory(base_dir.c_str()); | |
411 } | 418 } |
412 | 419 |
413 // allow funny input with extra whitespace and the wrong kind of slashes | 420 // allow funny input with extra whitespace and the wrong kind of slashes |
414 wstring trimmed; | 421 wstring trimmed; |
415 PrepareStringForFileOps(text, &trimmed); | 422 PrepareStringForFileOps(text, &trimmed); |
416 | 423 |
417 bool is_file = true; | 424 bool is_file = true; |
418 wstring full_path; | 425 wstring full_path; |
419 if (!ValidPathForFile(trimmed, &full_path)) { | 426 if (!ValidPathForFile(trimmed, &full_path)) { |
420 // Not a path as entered, try unescaping it in case the user has | 427 // Not a path as entered, try unescaping it in case the user has |
421 // escaped things. We need to go through 8-bit since the escaped values | 428 // escaped things. We need to go through 8-bit since the escaped values |
422 // only represent 8-bit values. | 429 // only represent 8-bit values. |
423 std::wstring unescaped = UTF8ToWide(UnescapeURLComponent( | 430 std::wstring unescaped = UTF8ToWide(UnescapeURLComponent( |
424 WideToUTF8(trimmed), | 431 WideToUTF8(trimmed), |
425 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)); | 432 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)); |
426 if (!ValidPathForFile(unescaped, &full_path)) | 433 if (!ValidPathForFile(unescaped, &full_path)) |
427 is_file = false; | 434 is_file = false; |
428 } | 435 } |
429 | 436 |
430 // Put back the current directory if we saved it. | 437 // Put back the current directory if we saved it. |
431 if (!base_dir.empty()) | 438 if (!base_dir.empty()) |
432 SetCurrentDirectory(old_cur_directory); | 439 file_util::SetCurrentDirectory(old_cur_directory); |
433 | 440 |
434 if (is_file) { | 441 if (is_file) { |
435 GURL file_url = net::FilePathToFileURL(full_path); | 442 GURL file_url = net::FilePathToFileURL(full_path); |
436 if (file_url.is_valid()) | 443 if (file_url.is_valid()) |
437 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring()); | 444 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring()); |
438 // Invalid files fall through to regular processing. | 445 // Invalid files fall through to regular processing. |
439 } | 446 } |
440 | 447 |
441 // Fall back on regular fixup for this input. | 448 // Fall back on regular fixup for this input. |
442 return FixupURL(text, L""); | 449 return FixupURL(text, L""); |
443 } | 450 } |
444 | 451 |
OLD | NEW |