| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <algorithm> | 5 #include <algorithm> |
| 6 #include <windows.h> | |
| 7 | 6 |
| 8 #include "chrome/browser/url_fixer_upper.h" | 7 #include "chrome/browser/url_fixer_upper.h" |
| 9 | 8 |
| 10 #include "base/file_util.h" | 9 #include "base/file_util.h" |
| 11 #include "base/logging.h" | 10 #include "base/logging.h" |
| 12 #include "base/string_util.h" | 11 #include "base/string_util.h" |
| 13 #include "chrome/common/gfx/text_elider.h" | 12 #include "chrome/common/gfx/text_elider.h" |
| 14 #include "googleurl/src/gurl.h" | 13 #include "googleurl/src/gurl.h" |
| 15 #include "googleurl/src/url_canon.h" | 14 #include "googleurl/src/url_canon.h" |
| 16 #include "googleurl/src/url_file.h" | 15 #include "googleurl/src/url_file.h" |
| 17 #include "googleurl/src/url_parse.h" | 16 #include "googleurl/src/url_parse.h" |
| 18 #include "googleurl/src/url_util.h" | 17 #include "googleurl/src/url_util.h" |
| 19 #include "net/base/escape.h" | 18 #include "net/base/escape.h" |
| 20 #include "net/base/net_util.h" | 19 #include "net/base/net_util.h" |
| 21 #include "net/base/registry_controlled_domain.h" | 20 #include "net/base/registry_controlled_domain.h" |
| 22 | 21 |
| 23 using namespace std; | 22 using namespace std; |
| 24 | 23 |
| 25 // does some basic fixes for input that we want to test for file-ness | 24 // does some basic fixes for input that we want to test for file-ness |
| 26 static void PrepareStringForFileOps(const wstring& text, wstring* output) { | 25 static void PrepareStringForFileOps(const wstring& text, wstring* output) { |
| 27 TrimWhitespace(text, TRIM_ALL, output); | 26 TrimWhitespace(text, TRIM_ALL, output); |
| 28 replace(output->begin(), output->end(), '/', '\\'); | 27 replace(output->begin(), output->end(), '/', '\\'); |
| 29 } | 28 } |
| 30 | 29 |
| 31 // Tries to create a full path from |text|. If the result is valid and the | 30 // Tries to create a full path from |text|. If the result is valid and the |
| 32 // file exists, returns true and sets |full_path| to the result. Otherwise, | 31 // file exists, returns true and sets |full_path| to the result. Otherwise, |
| 33 // returns false and leaves |full_path| unchanged. | 32 // returns false and leaves |full_path| unchanged. |
| 34 static bool ValidPathForFile(const wstring& text, wstring* full_path) { | 33 static bool ValidPathForFile(const wstring& text, wstring* full_path) { |
| 35 wchar_t file_path[MAX_PATH]; | 34 wstring file_path(text); |
| 36 if (!_wfullpath(file_path, text.c_str(), MAX_PATH)) | 35 if (!file_util::AbsolutePath(&file_path)) |
| 37 return false; | 36 return false; |
| 38 | 37 |
| 39 if (!file_util::PathExists(file_path)) | 38 if (!file_util::PathExists(file_path)) |
| 40 return false; | 39 return false; |
| 41 | 40 |
| 42 full_path->assign(file_path); | 41 full_path->assign(file_path); |
| 43 return true; | 42 return true; |
| 44 } | 43 } |
| 45 | 44 |
| 46 // Tries to create a file: URL from |text| if it looks like a filename, even if | 45 // Tries to create a file: URL from |text| if it looks like a filename, even if |
| (...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 248 } | 247 } |
| 249 | 248 |
| 250 return true; | 249 return true; |
| 251 } | 250 } |
| 252 | 251 |
| 253 wstring URLFixerUpper::SegmentURL(const wstring& text, | 252 wstring URLFixerUpper::SegmentURL(const wstring& text, |
| 254 url_parse::Parsed* parts) { | 253 url_parse::Parsed* parts) { |
| 255 // Initialize the result. | 254 // Initialize the result. |
| 256 *parts = url_parse::Parsed(); | 255 *parts = url_parse::Parsed(); |
| 257 | 256 |
| 257 #if defined(OS_WIN) |
| 258 wstring trimmed; | 258 wstring trimmed; |
| 259 TrimWhitespace(text, TRIM_ALL, &trimmed); | 259 TrimWhitespace(text, TRIM_ALL, &trimmed); |
| 260 if (trimmed.empty()) | 260 if (trimmed.empty()) |
| 261 return wstring(); // Nothing to segment. | 261 return wstring(); // Nothing to segment. |
| 262 | 262 |
| 263 int trimmed_length = static_cast<int>(trimmed.length()); | 263 int trimmed_length = static_cast<int>(trimmed.length()); |
| 264 if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) | 264 if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) |
| 265 || url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false)) | 265 || url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false)) |
| 266 return L"file"; | 266 return L"file"; |
| 267 #endif |
| 267 | 268 |
| 268 // Otherwise, we need to look at things carefully. | 269 // Otherwise, we need to look at things carefully. |
| 269 wstring scheme; | 270 wstring scheme; |
| 270 if (url_parse::ExtractScheme(text.data(), | 271 string text_utf8 = WideToUTF8(text); |
| 271 static_cast<int>(text.length()), | 272 if (url_parse::ExtractScheme(text_utf8.c_str(), |
| 273 static_cast<int>(text_utf8.length()), |
| 272 &parts->scheme)) { | 274 &parts->scheme)) { |
| 273 // We were able to extract a scheme. Remember what we have, but we may | 275 // We were able to extract a scheme. Remember what we have, but we may |
| 274 // decide to change our minds later. | 276 // decide to change our minds later. |
| 275 scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len)); | 277 scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len)); |
| 276 | 278 |
| 277 if (parts->scheme.is_valid() && | 279 if (parts->scheme.is_valid() && |
| 278 // Valid schemes are ASCII-only. | 280 // Valid schemes are ASCII-only. |
| 279 (!IsStringASCII(scheme) || | 281 (!IsStringASCII(scheme) || |
| 280 // We need to fix up the segmentation for "www.example.com:/". For this | 282 // We need to fix up the segmentation for "www.example.com:/". For this |
| 281 // case, we guess that schemes with a "." are not actually schemes. | 283 // case, we guess that schemes with a "." are not actually schemes. |
| (...skipping 13 matching lines...) Expand all Loading... |
| 295 } else { | 297 } else { |
| 296 // Having been unable to extract a scheme, we default to HTTP. | 298 // Having been unable to extract a scheme, we default to HTTP. |
| 297 scheme.assign(L"http"); | 299 scheme.assign(L"http"); |
| 298 scheme_end = 0; | 300 scheme_end = 0; |
| 299 } | 301 } |
| 300 | 302 |
| 301 // Cannonicalize the scheme. | 303 // Cannonicalize the scheme. |
| 302 StringToLowerASCII(&scheme); | 304 StringToLowerASCII(&scheme); |
| 303 | 305 |
| 304 // Not segmenting file schemes or nonstandard schemes. | 306 // Not segmenting file schemes or nonstandard schemes. |
| 307 string scheme_utf8 = WideToUTF8(scheme); |
| 305 if ((scheme == L"file") || | 308 if ((scheme == L"file") || |
| 306 !url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()), | 309 !url_util::IsStandard(scheme_utf8.c_str(), |
| 307 url_parse::Component(0, static_cast<int>(scheme.length())))) | 310 static_cast<int>(scheme_utf8.length()), |
| 311 url_parse::Component(0, static_cast<int>(scheme_utf8.length())))) |
| 308 return scheme; | 312 return scheme; |
| 309 | 313 |
| 310 if (parts->scheme.is_valid()) { | 314 if (parts->scheme.is_valid()) { |
| 311 // Have the GURL parser do the heavy lifting for us. | 315 // Have the GURL parser do the heavy lifting for us. |
| 312 url_parse::ParseStandardURL(text.data(), static_cast<int>(text.length()), | 316 string text_utf8 = WideToUTF8(text); |
| 317 url_parse::ParseStandardURL(text_utf8.c_str(), |
| 318 static_cast<int>(text_utf8.length()), |
| 313 parts); | 319 parts); |
| 314 return scheme; | 320 return scheme; |
| 315 } | 321 } |
| 316 | 322 |
| 317 // We need to add a scheme in order for ParseStandardURL to be happy. | 323 // We need to add a scheme in order for ParseStandardURL to be happy. |
| 318 // Find the first non-whitespace character. | 324 // Find the first non-whitespace character. |
| 319 wstring::const_iterator first_nonwhite = text.begin(); | 325 wstring::const_iterator first_nonwhite = text.begin(); |
| 320 while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite)) | 326 while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite)) |
| 321 ++first_nonwhite; | 327 ++first_nonwhite; |
| 322 | 328 |
| 323 // Construct the text to parse by inserting the scheme. | 329 // Construct the text to parse by inserting the scheme. |
| 324 wstring inserted_text(scheme); | 330 wstring inserted_text(scheme); |
| 325 inserted_text.append(L"://"); | 331 inserted_text.append(L"://"); |
| 326 wstring text_to_parse(text.begin(), first_nonwhite); | 332 wstring text_to_parse(text.begin(), first_nonwhite); |
| 327 text_to_parse.append(inserted_text); | 333 text_to_parse.append(inserted_text); |
| 328 text_to_parse.append(first_nonwhite, text.end()); | 334 text_to_parse.append(first_nonwhite, text.end()); |
| 329 | 335 |
| 330 // Have the GURL parser do the heavy lifting for us. | 336 // Have the GURL parser do the heavy lifting for us. |
| 331 url_parse::ParseStandardURL(text_to_parse.data(), | 337 string text_to_parse_utf8 = WideToUTF8(text_to_parse); |
| 332 static_cast<int>(text_to_parse.length()), | 338 url_parse::ParseStandardURL(text_to_parse_utf8.c_str(), |
| 339 static_cast<int>(text_to_parse_utf8.length()), |
| 333 parts); | 340 parts); |
| 334 | 341 |
| 335 // Offset the results of the parse to match the original text. | 342 // Offset the results of the parse to match the original text. |
| 336 const int offset = -static_cast<int>(inserted_text.length()); | 343 const int offset = -static_cast<int>(inserted_text.length()); |
| 337 OffsetComponent(offset, &parts->scheme); | 344 OffsetComponent(offset, &parts->scheme); |
| 338 OffsetComponent(offset, &parts->username); | 345 OffsetComponent(offset, &parts->username); |
| 339 OffsetComponent(offset, &parts->password); | 346 OffsetComponent(offset, &parts->password); |
| 340 OffsetComponent(offset, &parts->host); | 347 OffsetComponent(offset, &parts->host); |
| 341 OffsetComponent(offset, &parts->port); | 348 OffsetComponent(offset, &parts->port); |
| 342 OffsetComponent(offset, &parts->path); | 349 OffsetComponent(offset, &parts->path); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 355 | 362 |
| 356 // Segment the URL. | 363 // Segment the URL. |
| 357 url_parse::Parsed parts; | 364 url_parse::Parsed parts; |
| 358 wstring scheme(SegmentURL(trimmed, &parts)); | 365 wstring scheme(SegmentURL(trimmed, &parts)); |
| 359 | 366 |
| 360 // We handle the file scheme separately. | 367 // We handle the file scheme separately. |
| 361 if (scheme == L"file") | 368 if (scheme == L"file") |
| 362 return (parts.scheme.is_valid() ? text : FixupPath(text)); | 369 return (parts.scheme.is_valid() ? text : FixupPath(text)); |
| 363 | 370 |
| 364 // For some schemes whose layouts we understand, we rebuild it. | 371 // For some schemes whose layouts we understand, we rebuild it. |
| 365 if (url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()), | 372 if (url_util::IsStandard( |
| 366 url_parse::Component(0, static_cast<int>(scheme.length())))) { | 373 WideToUTF8(scheme).c_str(), static_cast<int>(scheme.length()), |
| 374 url_parse::Component(0, static_cast<int>(scheme.length())))) { |
| 367 wstring url(scheme); | 375 wstring url(scheme); |
| 368 url.append(L"://"); | 376 url.append(L"://"); |
| 369 | 377 |
| 370 // We need to check whether the |username| is valid because it is our | 378 // We need to check whether the |username| is valid because it is our |
| 371 // responsibility to append the '@' to delineate the user information from | 379 // responsibility to append the '@' to delineate the user information from |
| 372 // the host portion of the URL. | 380 // the host portion of the URL. |
| 373 if (parts.username.is_valid()) { | 381 if (parts.username.is_valid()) { |
| 374 FixupUsername(trimmed, parts.username, &url); | 382 FixupUsername(trimmed, parts.username, &url); |
| 375 FixupPassword(trimmed, parts.password, &url); | 383 FixupPassword(trimmed, parts.password, &url); |
| 376 url.append(L"@"); | 384 url.append(L"@"); |
| (...skipping 18 matching lines...) Expand all Loading... |
| 395 return trimmed; | 403 return trimmed; |
| 396 } | 404 } |
| 397 | 405 |
| 398 // The rules are different here than for regular fixup, since we need to handle | 406 // The rules are different here than for regular fixup, since we need to handle |
| 399 // input like "hello.html" and know to look in the current directory. Regular | 407 // input like "hello.html" and know to look in the current directory. Regular |
| 400 // fixup will look for cues that it is actually a file path before trying to | 408 // fixup will look for cues that it is actually a file path before trying to |
| 401 // figure out what file it is. If our logic doesn't work, we will fall back on | 409 // figure out what file it is. If our logic doesn't work, we will fall back on |
| 402 // regular fixup. | 410 // regular fixup. |
| 403 wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir, | 411 wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir, |
| 404 const wstring& text) { | 412 const wstring& text) { |
| 405 wchar_t old_cur_directory[MAX_PATH]; | 413 wstring old_cur_directory; |
| 406 if (!base_dir.empty()) { | 414 if (!base_dir.empty()) { |
| 407 // save the old current directory before we move to the new one | 415 // save the old current directory before we move to the new one |
| 408 // TODO: in the future, we may want to handle paths longer than MAX_PATH | 416 file_util::GetCurrentDirectory(&old_cur_directory); |
| 409 GetCurrentDirectory(MAX_PATH, old_cur_directory); | 417 file_util::SetCurrentDirectory(base_dir); |
| 410 SetCurrentDirectory(base_dir.c_str()); | |
| 411 } | 418 } |
| 412 | 419 |
| 413 // allow funny input with extra whitespace and the wrong kind of slashes | 420 // allow funny input with extra whitespace and the wrong kind of slashes |
| 414 wstring trimmed; | 421 wstring trimmed; |
| 415 PrepareStringForFileOps(text, &trimmed); | 422 PrepareStringForFileOps(text, &trimmed); |
| 416 | 423 |
| 417 bool is_file = true; | 424 bool is_file = true; |
| 418 wstring full_path; | 425 wstring full_path; |
| 419 if (!ValidPathForFile(trimmed, &full_path)) { | 426 if (!ValidPathForFile(trimmed, &full_path)) { |
| 420 // Not a path as entered, try unescaping it in case the user has | 427 // Not a path as entered, try unescaping it in case the user has |
| 421 // escaped things. We need to go through 8-bit since the escaped values | 428 // escaped things. We need to go through 8-bit since the escaped values |
| 422 // only represent 8-bit values. | 429 // only represent 8-bit values. |
| 423 std::wstring unescaped = UTF8ToWide(UnescapeURLComponent( | 430 std::wstring unescaped = UTF8ToWide(UnescapeURLComponent( |
| 424 WideToUTF8(trimmed), | 431 WideToUTF8(trimmed), |
| 425 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)); | 432 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)); |
| 426 if (!ValidPathForFile(unescaped, &full_path)) | 433 if (!ValidPathForFile(unescaped, &full_path)) |
| 427 is_file = false; | 434 is_file = false; |
| 428 } | 435 } |
| 429 | 436 |
| 430 // Put back the current directory if we saved it. | 437 // Put back the current directory if we saved it. |
| 431 if (!base_dir.empty()) | 438 if (!base_dir.empty()) |
| 432 SetCurrentDirectory(old_cur_directory); | 439 file_util::SetCurrentDirectory(old_cur_directory); |
| 433 | 440 |
| 434 if (is_file) { | 441 if (is_file) { |
| 435 GURL file_url = net::FilePathToFileURL(full_path); | 442 GURL file_url = net::FilePathToFileURL(full_path); |
| 436 if (file_url.is_valid()) | 443 if (file_url.is_valid()) |
| 437 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring()); | 444 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring()); |
| 438 // Invalid files fall through to regular processing. | 445 // Invalid files fall through to regular processing. |
| 439 } | 446 } |
| 440 | 447 |
| 441 // Fall back on regular fixup for this input. | 448 // Fall back on regular fixup for this input. |
| 442 return FixupURL(text, L""); | 449 return FixupURL(text, L""); |
| 443 } | 450 } |
| 444 | 451 |
| OLD | NEW |