| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <algorithm> | 5 #include <algorithm> |
| 6 #include <windows.h> |
| 6 | 7 |
| 7 #include "chrome/browser/url_fixer_upper.h" | 8 #include "chrome/browser/url_fixer_upper.h" |
| 8 | 9 |
| 9 #include "base/file_util.h" | 10 #include "base/file_util.h" |
| 10 #include "base/logging.h" | 11 #include "base/logging.h" |
| 11 #include "base/string_util.h" | 12 #include "base/string_util.h" |
| 12 #include "chrome/common/gfx/text_elider.h" | 13 #include "chrome/common/gfx/text_elider.h" |
| 13 #include "googleurl/src/gurl.h" | 14 #include "googleurl/src/gurl.h" |
| 14 #include "googleurl/src/url_canon.h" | 15 #include "googleurl/src/url_canon.h" |
| 15 #include "googleurl/src/url_file.h" | 16 #include "googleurl/src/url_file.h" |
| 16 #include "googleurl/src/url_parse.h" | 17 #include "googleurl/src/url_parse.h" |
| 17 #include "googleurl/src/url_util.h" | 18 #include "googleurl/src/url_util.h" |
| 18 #include "net/base/escape.h" | 19 #include "net/base/escape.h" |
| 19 #include "net/base/net_util.h" | 20 #include "net/base/net_util.h" |
| 20 #include "net/base/registry_controlled_domain.h" | 21 #include "net/base/registry_controlled_domain.h" |
| 21 | 22 |
| 22 using namespace std; | 23 using namespace std; |
| 23 | 24 |
| 24 // does some basic fixes for input that we want to test for file-ness | 25 // does some basic fixes for input that we want to test for file-ness |
| 25 static void PrepareStringForFileOps(const wstring& text, wstring* output) { | 26 static void PrepareStringForFileOps(const wstring& text, wstring* output) { |
| 26 TrimWhitespace(text, TRIM_ALL, output); | 27 TrimWhitespace(text, TRIM_ALL, output); |
| 27 replace(output->begin(), output->end(), '/', '\\'); | 28 replace(output->begin(), output->end(), '/', '\\'); |
| 28 } | 29 } |
| 29 | 30 |
| 30 // Tries to create a full path from |text|. If the result is valid and the | 31 // Tries to create a full path from |text|. If the result is valid and the |
| 31 // file exists, returns true and sets |full_path| to the result. Otherwise, | 32 // file exists, returns true and sets |full_path| to the result. Otherwise, |
| 32 // returns false and leaves |full_path| unchanged. | 33 // returns false and leaves |full_path| unchanged. |
| 33 static bool ValidPathForFile(const wstring& text, wstring* full_path) { | 34 static bool ValidPathForFile(const wstring& text, wstring* full_path) { |
| 34 wstring file_path(text); | 35 wchar_t file_path[MAX_PATH]; |
| 35 if (!file_util::AbsolutePath(&file_path)) | 36 if (!_wfullpath(file_path, text.c_str(), MAX_PATH)) |
| 36 return false; | 37 return false; |
| 37 | 38 |
| 38 if (!file_util::PathExists(file_path)) | 39 if (!file_util::PathExists(file_path)) |
| 39 return false; | 40 return false; |
| 40 | 41 |
| 41 full_path->assign(file_path); | 42 full_path->assign(file_path); |
| 42 return true; | 43 return true; |
| 43 } | 44 } |
| 44 | 45 |
| 45 // Tries to create a file: URL from |text| if it looks like a filename, even if | 46 // Tries to create a file: URL from |text| if it looks like a filename, even if |
| (...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 247 } | 248 } |
| 248 | 249 |
| 249 return true; | 250 return true; |
| 250 } | 251 } |
| 251 | 252 |
| 252 wstring URLFixerUpper::SegmentURL(const wstring& text, | 253 wstring URLFixerUpper::SegmentURL(const wstring& text, |
| 253 url_parse::Parsed* parts) { | 254 url_parse::Parsed* parts) { |
| 254 // Initialize the result. | 255 // Initialize the result. |
| 255 *parts = url_parse::Parsed(); | 256 *parts = url_parse::Parsed(); |
| 256 | 257 |
| 257 #if defined(OS_WIN) | |
| 258 wstring trimmed; | 258 wstring trimmed; |
| 259 TrimWhitespace(text, TRIM_ALL, &trimmed); | 259 TrimWhitespace(text, TRIM_ALL, &trimmed); |
| 260 if (trimmed.empty()) | 260 if (trimmed.empty()) |
| 261 return wstring(); // Nothing to segment. | 261 return wstring(); // Nothing to segment. |
| 262 | 262 |
| 263 int trimmed_length = static_cast<int>(trimmed.length()); | 263 int trimmed_length = static_cast<int>(trimmed.length()); |
| 264 if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) | 264 if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length) |
| 265 || url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false)) | 265 || url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false)) |
| 266 return L"file"; | 266 return L"file"; |
| 267 #endif | |
| 268 | 267 |
| 269 // Otherwise, we need to look at things carefully. | 268 // Otherwise, we need to look at things carefully. |
| 270 wstring scheme; | 269 wstring scheme; |
| 271 string text_utf8 = WideToUTF8(text); | 270 if (url_parse::ExtractScheme(text.data(), |
| 272 if (url_parse::ExtractScheme(text_utf8.c_str(), | 271 static_cast<int>(text.length()), |
| 273 static_cast<int>(text_utf8.length()), | |
| 274 &parts->scheme)) { | 272 &parts->scheme)) { |
| 275 // We were able to extract a scheme. Remember what we have, but we may | 273 // We were able to extract a scheme. Remember what we have, but we may |
| 276 // decide to change our minds later. | 274 // decide to change our minds later. |
| 277 scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len)); | 275 scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len)); |
| 278 | 276 |
| 279 if (parts->scheme.is_valid() && | 277 if (parts->scheme.is_valid() && |
| 280 // Valid schemes are ASCII-only. | 278 // Valid schemes are ASCII-only. |
| 281 (!IsStringASCII(scheme) || | 279 (!IsStringASCII(scheme) || |
| 282 // We need to fix up the segmentation for "www.example.com:/". For this | 280 // We need to fix up the segmentation for "www.example.com:/". For this |
| 283 // case, we guess that schemes with a "." are not actually schemes. | 281 // case, we guess that schemes with a "." are not actually schemes. |
| (...skipping 13 matching lines...) Expand all Loading... |
| 297 } else { | 295 } else { |
| 298 // Having been unable to extract a scheme, we default to HTTP. | 296 // Having been unable to extract a scheme, we default to HTTP. |
| 299 scheme.assign(L"http"); | 297 scheme.assign(L"http"); |
| 300 scheme_end = 0; | 298 scheme_end = 0; |
| 301 } | 299 } |
| 302 | 300 |
| 303 // Cannonicalize the scheme. | 301 // Cannonicalize the scheme. |
| 304 StringToLowerASCII(&scheme); | 302 StringToLowerASCII(&scheme); |
| 305 | 303 |
| 306 // Not segmenting file schemes or nonstandard schemes. | 304 // Not segmenting file schemes or nonstandard schemes. |
| 307 string scheme_utf8 = WideToUTF8(scheme); | |
| 308 if ((scheme == L"file") || | 305 if ((scheme == L"file") || |
| 309 !url_util::IsStandard(scheme_utf8.c_str(), | 306 !url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()), |
| 310 static_cast<int>(scheme_utf8.length()), | 307 url_parse::Component(0, static_cast<int>(scheme.length())))) |
| 311 url_parse::Component(0, static_cast<int>(scheme_utf8.length())))) | |
| 312 return scheme; | 308 return scheme; |
| 313 | 309 |
| 314 if (parts->scheme.is_valid()) { | 310 if (parts->scheme.is_valid()) { |
| 315 // Have the GURL parser do the heavy lifting for us. | 311 // Have the GURL parser do the heavy lifting for us. |
| 316 string text_utf8 = WideToUTF8(text); | 312 url_parse::ParseStandardURL(text.data(), static_cast<int>(text.length()), |
| 317 url_parse::ParseStandardURL(text_utf8.c_str(), | |
| 318 static_cast<int>(text_utf8.length()), | |
| 319 parts); | 313 parts); |
| 320 return scheme; | 314 return scheme; |
| 321 } | 315 } |
| 322 | 316 |
| 323 // We need to add a scheme in order for ParseStandardURL to be happy. | 317 // We need to add a scheme in order for ParseStandardURL to be happy. |
| 324 // Find the first non-whitespace character. | 318 // Find the first non-whitespace character. |
| 325 wstring::const_iterator first_nonwhite = text.begin(); | 319 wstring::const_iterator first_nonwhite = text.begin(); |
| 326 while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite)) | 320 while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite)) |
| 327 ++first_nonwhite; | 321 ++first_nonwhite; |
| 328 | 322 |
| 329 // Construct the text to parse by inserting the scheme. | 323 // Construct the text to parse by inserting the scheme. |
| 330 wstring inserted_text(scheme); | 324 wstring inserted_text(scheme); |
| 331 inserted_text.append(L"://"); | 325 inserted_text.append(L"://"); |
| 332 wstring text_to_parse(text.begin(), first_nonwhite); | 326 wstring text_to_parse(text.begin(), first_nonwhite); |
| 333 text_to_parse.append(inserted_text); | 327 text_to_parse.append(inserted_text); |
| 334 text_to_parse.append(first_nonwhite, text.end()); | 328 text_to_parse.append(first_nonwhite, text.end()); |
| 335 | 329 |
| 336 // Have the GURL parser do the heavy lifting for us. | 330 // Have the GURL parser do the heavy lifting for us. |
| 337 string text_to_parse_utf8 = WideToUTF8(text_to_parse); | 331 url_parse::ParseStandardURL(text_to_parse.data(), |
| 338 url_parse::ParseStandardURL(text_to_parse_utf8.c_str(), | 332 static_cast<int>(text_to_parse.length()), |
| 339 static_cast<int>(text_to_parse_utf8.length()), | |
| 340 parts); | 333 parts); |
| 341 | 334 |
| 342 // Offset the results of the parse to match the original text. | 335 // Offset the results of the parse to match the original text. |
| 343 const int offset = -static_cast<int>(inserted_text.length()); | 336 const int offset = -static_cast<int>(inserted_text.length()); |
| 344 OffsetComponent(offset, &parts->scheme); | 337 OffsetComponent(offset, &parts->scheme); |
| 345 OffsetComponent(offset, &parts->username); | 338 OffsetComponent(offset, &parts->username); |
| 346 OffsetComponent(offset, &parts->password); | 339 OffsetComponent(offset, &parts->password); |
| 347 OffsetComponent(offset, &parts->host); | 340 OffsetComponent(offset, &parts->host); |
| 348 OffsetComponent(offset, &parts->port); | 341 OffsetComponent(offset, &parts->port); |
| 349 OffsetComponent(offset, &parts->path); | 342 OffsetComponent(offset, &parts->path); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 362 | 355 |
| 363 // Segment the URL. | 356 // Segment the URL. |
| 364 url_parse::Parsed parts; | 357 url_parse::Parsed parts; |
| 365 wstring scheme(SegmentURL(trimmed, &parts)); | 358 wstring scheme(SegmentURL(trimmed, &parts)); |
| 366 | 359 |
| 367 // We handle the file scheme separately. | 360 // We handle the file scheme separately. |
| 368 if (scheme == L"file") | 361 if (scheme == L"file") |
| 369 return (parts.scheme.is_valid() ? text : FixupPath(text)); | 362 return (parts.scheme.is_valid() ? text : FixupPath(text)); |
| 370 | 363 |
| 371 // For some schemes whose layouts we understand, we rebuild it. | 364 // For some schemes whose layouts we understand, we rebuild it. |
| 372 if (url_util::IsStandard( | 365 if (url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()), |
| 373 WideToUTF8(scheme).c_str(), static_cast<int>(scheme.length()), | 366 url_parse::Component(0, static_cast<int>(scheme.length())))) { |
| 374 url_parse::Component(0, static_cast<int>(scheme.length())))) { | |
| 375 wstring url(scheme); | 367 wstring url(scheme); |
| 376 url.append(L"://"); | 368 url.append(L"://"); |
| 377 | 369 |
| 378 // We need to check whether the |username| is valid because it is our | 370 // We need to check whether the |username| is valid because it is our |
| 379 // responsibility to append the '@' to delineate the user information from | 371 // responsibility to append the '@' to delineate the user information from |
| 380 // the host portion of the URL. | 372 // the host portion of the URL. |
| 381 if (parts.username.is_valid()) { | 373 if (parts.username.is_valid()) { |
| 382 FixupUsername(trimmed, parts.username, &url); | 374 FixupUsername(trimmed, parts.username, &url); |
| 383 FixupPassword(trimmed, parts.password, &url); | 375 FixupPassword(trimmed, parts.password, &url); |
| 384 url.append(L"@"); | 376 url.append(L"@"); |
| (...skipping 18 matching lines...) Expand all Loading... |
| 403 return trimmed; | 395 return trimmed; |
| 404 } | 396 } |
| 405 | 397 |
| 406 // The rules are different here than for regular fixup, since we need to handle | 398 // The rules are different here than for regular fixup, since we need to handle |
| 407 // input like "hello.html" and know to look in the current directory. Regular | 399 // input like "hello.html" and know to look in the current directory. Regular |
| 408 // fixup will look for cues that it is actually a file path before trying to | 400 // fixup will look for cues that it is actually a file path before trying to |
| 409 // figure out what file it is. If our logic doesn't work, we will fall back on | 401 // figure out what file it is. If our logic doesn't work, we will fall back on |
| 410 // regular fixup. | 402 // regular fixup. |
| 411 wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir, | 403 wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir, |
| 412 const wstring& text) { | 404 const wstring& text) { |
| 413 wstring old_cur_directory; | 405 wchar_t old_cur_directory[MAX_PATH]; |
| 414 if (!base_dir.empty()) { | 406 if (!base_dir.empty()) { |
| 415 // save the old current directory before we move to the new one | 407 // save the old current directory before we move to the new one |
| 416 file_util::GetCurrentDirectory(&old_cur_directory); | 408 // TODO: in the future, we may want to handle paths longer than MAX_PATH |
| 417 file_util::SetCurrentDirectory(base_dir); | 409 GetCurrentDirectory(MAX_PATH, old_cur_directory); |
| 410 SetCurrentDirectory(base_dir.c_str()); |
| 418 } | 411 } |
| 419 | 412 |
| 420 // allow funny input with extra whitespace and the wrong kind of slashes | 413 // allow funny input with extra whitespace and the wrong kind of slashes |
| 421 wstring trimmed; | 414 wstring trimmed; |
| 422 PrepareStringForFileOps(text, &trimmed); | 415 PrepareStringForFileOps(text, &trimmed); |
| 423 | 416 |
| 424 bool is_file = true; | 417 bool is_file = true; |
| 425 wstring full_path; | 418 wstring full_path; |
| 426 if (!ValidPathForFile(trimmed, &full_path)) { | 419 if (!ValidPathForFile(trimmed, &full_path)) { |
| 427 // Not a path as entered, try unescaping it in case the user has | 420 // Not a path as entered, try unescaping it in case the user has |
| 428 // escaped things. We need to go through 8-bit since the escaped values | 421 // escaped things. We need to go through 8-bit since the escaped values |
| 429 // only represent 8-bit values. | 422 // only represent 8-bit values. |
| 430 std::wstring unescaped = UTF8ToWide(UnescapeURLComponent( | 423 std::wstring unescaped = UTF8ToWide(UnescapeURLComponent( |
| 431 WideToUTF8(trimmed), | 424 WideToUTF8(trimmed), |
| 432 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)); | 425 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS)); |
| 433 if (!ValidPathForFile(unescaped, &full_path)) | 426 if (!ValidPathForFile(unescaped, &full_path)) |
| 434 is_file = false; | 427 is_file = false; |
| 435 } | 428 } |
| 436 | 429 |
| 437 // Put back the current directory if we saved it. | 430 // Put back the current directory if we saved it. |
| 438 if (!base_dir.empty()) | 431 if (!base_dir.empty()) |
| 439 file_util::SetCurrentDirectory(old_cur_directory); | 432 SetCurrentDirectory(old_cur_directory); |
| 440 | 433 |
| 441 if (is_file) { | 434 if (is_file) { |
| 442 GURL file_url = net::FilePathToFileURL(full_path); | 435 GURL file_url = net::FilePathToFileURL(full_path); |
| 443 if (file_url.is_valid()) | 436 if (file_url.is_valid()) |
| 444 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring()); | 437 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring()); |
| 445 // Invalid files fall through to regular processing. | 438 // Invalid files fall through to regular processing. |
| 446 } | 439 } |
| 447 | 440 |
| 448 // Fall back on regular fixup for this input. | 441 // Fall back on regular fixup for this input. |
| 449 return FixupURL(text, L""); | 442 return FixupURL(text, L""); |
| 450 } | 443 } |
| 451 | 444 |
| OLD | NEW |