Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(668)

Side by Side Diff: chrome/browser/url_fixer_upper.cc

Issue 18305: Move url_* to net subdir (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 11 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/browser/url_fixer_upper.h ('k') | chrome/browser/url_fixer_upper_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <windows.h>
7
8 #include "chrome/browser/url_fixer_upper.h"
9
10 #include "base/file_util.h"
11 #include "base/logging.h"
12 #include "base/string_util.h"
13 #include "chrome/common/gfx/text_elider.h"
14 #include "googleurl/src/gurl.h"
15 #include "googleurl/src/url_canon.h"
16 #include "googleurl/src/url_file.h"
17 #include "googleurl/src/url_parse.h"
18 #include "googleurl/src/url_util.h"
19 #include "net/base/escape.h"
20 #include "net/base/net_util.h"
21 #include "net/base/registry_controlled_domain.h"
22
23 using namespace std;
24
25 // does some basic fixes for input that we want to test for file-ness
26 static void PrepareStringForFileOps(const wstring& text, wstring* output) {
27 TrimWhitespace(text, TRIM_ALL, output);
28 replace(output->begin(), output->end(), '/', '\\');
29 }
30
31 // Tries to create a full path from |text|. If the result is valid and the
32 // file exists, returns true and sets |full_path| to the result. Otherwise,
33 // returns false and leaves |full_path| unchanged.
34 static bool ValidPathForFile(const wstring& text, wstring* full_path) {
35 wchar_t file_path[MAX_PATH];
36 if (!_wfullpath(file_path, text.c_str(), MAX_PATH))
37 return false;
38
39 if (!file_util::PathExists(file_path))
40 return false;
41
42 full_path->assign(file_path);
43 return true;
44 }
45
46 // Tries to create a file: URL from |text| if it looks like a filename, even if
47 // it doesn't resolve as a valid path or to an existing file. Returns true
48 // with a (possibly invalid) file: URL in |fixed_up_url| for input beginning
49 // with a drive specifier or "\\". Returns false in other cases (including
50 // file: URLs: these don't look like filenames), leaving fixed_up_url
51 // unchanged.
52 static wstring FixupPath(const wstring& text) {
53 DCHECK(text.length() >= 2);
54
55 wstring filename;
56 PrepareStringForFileOps(text, &filename);
57
58 if (filename[1] == '|')
59 filename[1] = ':';
60
61 // Here, we know the input looks like a file.
62 GURL file_url = net::FilePathToFileURL(filename);
63 if (file_url.is_valid())
64 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring());
65
66 // Invalid file URL, just return the input.
67 return text;
68 }
69
70 // Checks |domain| to see if a valid TLD is already present. If not, appends
71 // |desired_tld| to the domain, and prepends "www." unless it's already present.
72 // Then modifies |fixed_up_url| to reflect the changes.
73 static void AddDesiredTLD(const wstring& desired_tld,
74 wstring* domain) {
75 if (desired_tld.empty() || domain->empty())
76 return;
77
78 // Check the TLD. If the return value is positive, we already have a TLD, so
79 // abort; if the return value is wstring::npos, there's no valid host (e.g. if
80 // the user pasted in garbage for which HistoryURLProvider is trying to
81 // suggest an exact match), so adding a TLD makes no sense. The only useful
82 // case is where the return value is 0 (there's a valid host with no known
83 // TLD). We disallow unknown registries here so users can input "mail.yahoo"
84 // and hit ctrl-enter to get "www.mail.yahoo.com".
85 const size_t registry_length =
86 net::RegistryControlledDomainService::GetRegistryLength(*domain, false);
87 if (registry_length != 0)
88 return;
89
90 // Add the suffix at the end of the domain.
91 const size_t domain_length(domain->length());
92 DCHECK(domain_length > 0);
93 DCHECK(desired_tld[0] != '.');
94 if ((*domain)[domain_length - 1] != '.')
95 domain->push_back('.');
96 domain->append(desired_tld);
97
98 // Now, if the domain begins with "www.", stop.
99 const wstring prefix(L"www.");
100 if (domain->compare(0, prefix.length(), prefix) != 0) {
101 // Otherwise, add www. to the beginning of the URL.
102 domain->insert(0, prefix);
103 }
104 }
105
106 static inline void FixupUsername(const wstring& text,
107 const url_parse::Component& part,
108 wstring* url) {
109 if (!part.is_valid())
110 return;
111
112 // We don't fix up the username at the moment.
113 url->append(text, part.begin, part.len);
114 // Do not append the trailing '@' because we might need to include the user's
115 // password. FixupURL itself will append the '@' for us.
116 }
117
118 static inline void FixupPassword(const wstring& text,
119 const url_parse::Component& part,
120 wstring* url) {
121 if (!part.is_valid())
122 return;
123
124 // We don't fix up the password at the moment.
125 url->append(L":");
126 url->append(text, part.begin, part.len);
127 }
128
129 static void FixupHost(const wstring& text,
130 const url_parse::Component& part,
131 bool has_scheme,
132 const wstring& desired_tld,
133 wstring* url) {
134 if (!part.is_valid())
135 return;
136
137 // Make domain valid.
138 // Strip all leading dots and all but one trailing dot, unless the user only
139 // typed dots, in which case their input is totally invalid and we should just
140 // leave it unchanged.
141 wstring domain(text, part.begin, part.len);
142 const size_t first_nondot(domain.find_first_not_of('.'));
143 if (first_nondot != wstring::npos) {
144 domain.erase(0, first_nondot);
145 size_t last_nondot(domain.find_last_not_of('.'));
146 DCHECK(last_nondot != wstring::npos);
147 last_nondot += 2; // Point at second period in ending string
148 if (last_nondot < domain.length())
149 domain.erase(last_nondot);
150 }
151
152 // Add any user-specified TLD, if applicable.
153 AddDesiredTLD(desired_tld, &domain);
154
155 url->append(domain);
156 }
157
158 // Looks for a port number, including initial colon, at port_start. If
159 // something invalid (which cannot be fixed up) is found, like ":foo" or
160 // ":7:7", returns false. Otherwise, removes any extra colons
161 // ("::1337" -> ":1337", ":/" -> "/") and returns true.
162 static void FixupPort(const wstring& text,
163 const url_parse::Component& part,
164 wstring* url) {
165 if (!part.is_valid())
166 return;
167
168 // Look for non-digit in port and strip if found.
169 wstring port(text, part.begin, part.len);
170 for (wstring::iterator i = port.begin(); i != port.end(); ) {
171 if (IsAsciiDigit(*i))
172 ++i;
173 else
174 i = port.erase(i);
175 }
176
177 if (port.empty())
178 return; // Nothing to append.
179
180 url->append(L":");
181 url->append(port);
182 }
183
184 static inline void FixupPath(const wstring& text,
185 const url_parse::Component& part,
186 wstring* url) {
187 if (!part.is_valid() || part.len == 0) {
188 // We should always have a path.
189 url->append(L"/");
190 return;
191 }
192
193 // Append the path as is.
194 url->append(text, part.begin, part.len);
195 }
196
197 static inline void FixupQuery(const wstring& text,
198 const url_parse::Component& part,
199 wstring* url) {
200 if (!part.is_valid())
201 return;
202
203 // We don't fix up the query at the moment.
204 url->append(L"?");
205 url->append(text, part.begin, part.len);
206 }
207
208 static inline void FixupRef(const wstring& text,
209 const url_parse::Component& part,
210 wstring* url) {
211 if (!part.is_valid())
212 return;
213
214 // We don't fix up the ref at the moment.
215 url->append(L"#");
216 url->append(text, part.begin, part.len);
217 }
218
219 static void OffsetComponent(int offset, url_parse::Component* part) {
220 DCHECK(part);
221
222 if (part->is_valid()) {
223 // Offset the location of this component.
224 part->begin += offset;
225
226 // This part might not have existed in the original text.
227 if (part->begin < 0)
228 part->reset();
229 }
230 }
231
232 static bool HasPort(const std::wstring& original_text,
233 const url_parse::Component& scheme_component,
234 const std::wstring& scheme) {
235 // Find the range between the ":" and the "/".
236 size_t port_start = scheme_component.end() + 1;
237 size_t port_end = port_start;
238 while ((port_end < original_text.length()) &&
239 !url_parse::IsAuthorityTerminator(original_text[port_end]))
240 ++port_end;
241 if (port_end == port_start)
242 return false;
243
244 // Scan the range to see if it is entirely digits.
245 for (size_t i = port_start; i < port_end; ++i) {
246 if (!IsAsciiDigit(original_text[i]))
247 return false;
248 }
249
250 return true;
251 }
252
253 wstring URLFixerUpper::SegmentURL(const wstring& text,
254 url_parse::Parsed* parts) {
255 // Initialize the result.
256 *parts = url_parse::Parsed();
257
258 wstring trimmed;
259 TrimWhitespace(text, TRIM_ALL, &trimmed);
260 if (trimmed.empty())
261 return wstring(); // Nothing to segment.
262
263 int trimmed_length = static_cast<int>(trimmed.length());
264 if (url_parse::DoesBeginWindowsDriveSpec(trimmed.data(), 0, trimmed_length)
265 || url_parse::DoesBeginUNCPath(trimmed.data(), 0, trimmed_length, false))
266 return L"file";
267
268 // Otherwise, we need to look at things carefully.
269 wstring scheme;
270 if (url_parse::ExtractScheme(text.data(),
271 static_cast<int>(text.length()),
272 &parts->scheme)) {
273 // We were able to extract a scheme. Remember what we have, but we may
274 // decide to change our minds later.
275 scheme.assign(text.substr(parts->scheme.begin, parts->scheme.len));
276
277 if (parts->scheme.is_valid() &&
278 // Valid schemes are ASCII-only.
279 (!IsStringASCII(scheme) ||
280 // We need to fix up the segmentation for "www.example.com:/". For this
281 // case, we guess that schemes with a "." are not actually schemes.
282 (scheme.find(L".") != wstring::npos) ||
283 // We need to fix up the segmentation for "www:123/". For this case, we
284 // will add an HTTP scheme later and make the URL parser happy.
285 // TODO(pkasting): Maybe we should try to use GURL's parser for this?
286 HasPort(text, parts->scheme, scheme)))
287 parts->scheme.reset();
288 }
289
290 // When we couldn't find a scheme in the input, we need to pick one. Normally
291 // we choose http, but if the URL starts with "ftp.", we match other browsers
292 // and choose ftp.
293 if (!parts->scheme.is_valid())
294 scheme.assign(StartsWith(text, L"ftp.", false) ? L"ftp" : L"http");
295
296 // Cannonicalize the scheme.
297 StringToLowerASCII(&scheme);
298
299 // Not segmenting file schemes or nonstandard schemes.
300 if ((scheme == L"file") ||
301 !url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()),
302 url_parse::Component(0, static_cast<int>(scheme.length()))))
303 return scheme;
304
305 if (parts->scheme.is_valid()) {
306 // Have the GURL parser do the heavy lifting for us.
307 url_parse::ParseStandardURL(text.data(), static_cast<int>(text.length()),
308 parts);
309 return scheme;
310 }
311
312 // We need to add a scheme in order for ParseStandardURL to be happy.
313 // Find the first non-whitespace character.
314 wstring::const_iterator first_nonwhite = text.begin();
315 while ((first_nonwhite != text.end()) && IsWhitespace(*first_nonwhite))
316 ++first_nonwhite;
317
318 // Construct the text to parse by inserting the scheme.
319 wstring inserted_text(scheme);
320 inserted_text.append(L"://");
321 wstring text_to_parse(text.begin(), first_nonwhite);
322 text_to_parse.append(inserted_text);
323 text_to_parse.append(first_nonwhite, text.end());
324
325 // Have the GURL parser do the heavy lifting for us.
326 url_parse::ParseStandardURL(text_to_parse.data(),
327 static_cast<int>(text_to_parse.length()),
328 parts);
329
330 // Offset the results of the parse to match the original text.
331 const int offset = -static_cast<int>(inserted_text.length());
332 OffsetComponent(offset, &parts->scheme);
333 OffsetComponent(offset, &parts->username);
334 OffsetComponent(offset, &parts->password);
335 OffsetComponent(offset, &parts->host);
336 OffsetComponent(offset, &parts->port);
337 OffsetComponent(offset, &parts->path);
338 OffsetComponent(offset, &parts->query);
339 OffsetComponent(offset, &parts->ref);
340
341 return scheme;
342 }
343
344 std::wstring URLFixerUpper::FixupURL(const wstring& text,
345 const wstring& desired_tld) {
346 wstring trimmed;
347 TrimWhitespace(text, TRIM_ALL, &trimmed);
348 if (trimmed.empty())
349 return wstring(); // Nothing here.
350
351 // Segment the URL.
352 url_parse::Parsed parts;
353 wstring scheme(SegmentURL(trimmed, &parts));
354
355 // We handle the file scheme separately.
356 if (scheme == L"file")
357 return (parts.scheme.is_valid() ? text : FixupPath(text));
358
359 // For some schemes whose layouts we understand, we rebuild it.
360 if (url_util::IsStandard(scheme.c_str(), static_cast<int>(scheme.length()),
361 url_parse::Component(0, static_cast<int>(scheme.length())))) {
362 wstring url(scheme);
363 url.append(L"://");
364
365 // We need to check whether the |username| is valid because it is our
366 // responsibility to append the '@' to delineate the user information from
367 // the host portion of the URL.
368 if (parts.username.is_valid()) {
369 FixupUsername(trimmed, parts.username, &url);
370 FixupPassword(trimmed, parts.password, &url);
371 url.append(L"@");
372 }
373
374 FixupHost(trimmed, parts.host, parts.scheme.is_valid(), desired_tld, &url);
375 FixupPort(trimmed, parts.port, &url);
376 FixupPath(trimmed, parts.path, &url);
377 FixupQuery(trimmed, parts.query, &url);
378 FixupRef(trimmed, parts.ref, &url);
379
380 return url;
381 }
382
383 // In the worst-case, we insert a scheme if the URL lacks one.
384 if (!parts.scheme.is_valid()) {
385 wstring fixed_scheme(scheme);
386 fixed_scheme.append(L"://");
387 trimmed.insert(0, fixed_scheme);
388 }
389
390 return trimmed;
391 }
392
393 // The rules are different here than for regular fixup, since we need to handle
394 // input like "hello.html" and know to look in the current directory. Regular
395 // fixup will look for cues that it is actually a file path before trying to
396 // figure out what file it is. If our logic doesn't work, we will fall back on
397 // regular fixup.
398 wstring URLFixerUpper::FixupRelativeFile(const wstring& base_dir,
399 const wstring& text) {
400 wchar_t old_cur_directory[MAX_PATH];
401 if (!base_dir.empty()) {
402 // save the old current directory before we move to the new one
403 // TODO: in the future, we may want to handle paths longer than MAX_PATH
404 GetCurrentDirectory(MAX_PATH, old_cur_directory);
405 SetCurrentDirectory(base_dir.c_str());
406 }
407
408 // allow funny input with extra whitespace and the wrong kind of slashes
409 wstring trimmed;
410 PrepareStringForFileOps(text, &trimmed);
411
412 bool is_file = true;
413 wstring full_path;
414 if (!ValidPathForFile(trimmed, &full_path)) {
415 // Not a path as entered, try unescaping it in case the user has
416 // escaped things. We need to go through 8-bit since the escaped values
417 // only represent 8-bit values.
418 std::wstring unescaped = UTF8ToWide(UnescapeURLComponent(
419 WideToUTF8(trimmed),
420 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS));
421 if (!ValidPathForFile(unescaped, &full_path))
422 is_file = false;
423 }
424
425 // Put back the current directory if we saved it.
426 if (!base_dir.empty())
427 SetCurrentDirectory(old_cur_directory);
428
429 if (is_file) {
430 GURL file_url = net::FilePathToFileURL(full_path);
431 if (file_url.is_valid())
432 return gfx::ElideUrl(file_url, ChromeFont(), 0, std::wstring());
433 // Invalid files fall through to regular processing.
434 }
435
436 // Fall back on regular fixup for this input.
437 return FixupURL(text, L"");
438 }
439
OLDNEW
« no previous file with comments | « chrome/browser/url_fixer_upper.h ('k') | chrome/browser/url_fixer_upper_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698