OLD | NEW |
(Empty) | |
| 1 // Copyright 2007, Google Inc. |
| 2 // All rights reserved. |
| 3 // |
| 4 // Redistribution and use in source and binary forms, with or without |
| 5 // modification, are permitted provided that the following conditions are |
| 6 // met: |
| 7 // |
| 8 // * Redistributions of source code must retain the above copyright |
| 9 // notice, this list of conditions and the following disclaimer. |
| 10 // * Redistributions in binary form must reproduce the above |
| 11 // copyright notice, this list of conditions and the following disclaimer |
| 12 // in the documentation and/or other materials provided with the |
| 13 // distribution. |
| 14 // * Neither the name of Google Inc. nor the names of its |
| 15 // contributors may be used to endorse or promote products derived from |
| 16 // this software without specific prior written permission. |
| 17 // |
| 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 |
| 30 #include "base/logging.h" |
| 31 #include "googleurl/src/url_file.h" |
| 32 #include "googleurl/src/url_parse.h" |
| 33 #include "googleurl/src/url_parse_internal.h" |
| 34 |
| 35 // Interesting IE file:isms... |
| 36 // |
| 37 // INPUT OUTPUT |
| 38 // ========================= ============================== |
| 39 // file:/foo/bar file:///foo/bar |
| 40 // The result here seems totally invalid!?!? This isn't UNC. |
| 41 // |
| 42 // file:/ |
| 43 // file:// or any other number of slashes |
| 44 // IE6 doesn't do anything at all if you click on this link. No error: |
| 45 // nothing. IE6's history system seems to always color this link, so I'm |
| 46 // guessing that it maps internally to the empty URL. |
| 47 // |
| 48 // C:\ file:///C:/ |
| 49 // When on a file: URL source page, this link will work. When over HTTP, |
| 50 // the file: URL will appear in the status bar but the link will not work |
| 51 // (security restriction for all file URLs). |
| 52 // |
| 53 // file:foo/ file:foo/ (invalid?!?!?) |
| 54 // file:/foo/ file:///foo/ (invalid?!?!?) |
| 55 // file://foo/ file://foo/ (UNC to server "foo") |
| 56 // file:///foo/ file:///foo/ (invalid, seems to be a file) |
| 57 // file:////foo/ file://foo/ (UNC to server "foo") |
| 58 // Any more than four slashes is also treated as UNC. |
| 59 // |
| 60 // file:C:/ file://C:/ |
| 61 // file:/C:/ file://C:/ |
| 62 // The number of slashes after "file:" don't matter if the thing following |
| 63 // it looks like an absolute drive path. Also, slashes and backslashes are |
| 64 // equally valid here. |
| 65 |
| 66 namespace url_parse { |
| 67 |
| 68 namespace { |
| 69 |
| 70 // A subcomponent of DoInitFileURL, the input of this function should be a UNC |
| 71 // path name, with the index of the first character after the slashes following |
| 72 // the scheme given in |after_slashes|. This will initialize the host, path, |
| 73 // query, and ref, and leave the other output components untouched |
| 74 // (DoInitFileURL handles these for us). |
| 75 template<typename CHAR> |
| 76 void DoParseUNC(const CHAR* spec, |
| 77 int after_slashes, |
| 78 int spec_len, |
| 79 Parsed* parsed) { |
| 80 int next_slash = FindNextSlash(spec, after_slashes, spec_len); |
| 81 if (next_slash == spec_len) { |
| 82 // No additional slash found, as in "file://foo", treat the text as the |
| 83 // host with no path (this will end up being UNC to server "foo"). |
| 84 int host_len = spec_len - after_slashes; |
| 85 if (host_len) |
| 86 parsed->host = Component(after_slashes, host_len); |
| 87 else |
| 88 parsed->host.reset(); |
| 89 parsed->path.reset(); |
| 90 return; |
| 91 } |
| 92 |
| 93 #ifdef WIN32 |
| 94 // See if we have something that looks like a path following the first |
| 95 // component. As in "file://localhost/c:/", we get "c:/" out. We want to |
| 96 // treat this as a having no host but the path given. Works on Windows only. |
| 97 if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) { |
| 98 parsed->host.reset(); |
| 99 ParsePathInternal(spec, MakeRange(next_slash, spec_len), |
| 100 &parsed->path, &parsed->query, &parsed->ref); |
| 101 return; |
| 102 } |
| 103 #endif |
| 104 |
| 105 // Otherwise, everything up until that first slash we found is the host name, |
| 106 // which will end up being the UNC host. For example "file://foo/bar.txt" |
| 107 // will get a server name of "foo" and a path of "/bar". Later, on Windows, |
| 108 // this should be treated as the filename "\\foo\bar.txt" in proper UNC |
| 109 // notation. |
| 110 int host_len = next_slash - after_slashes; |
| 111 if (host_len) |
| 112 parsed->host = MakeRange(after_slashes, next_slash); |
| 113 else |
| 114 parsed->host.reset(); |
| 115 if (next_slash < spec_len) { |
| 116 ParsePathInternal(spec, MakeRange(next_slash, spec_len), |
| 117 &parsed->path, &parsed->query, &parsed->ref); |
| 118 } else { |
| 119 parsed->path.reset(); |
| 120 } |
| 121 } |
| 122 |
| 123 // A subcomponent of DoParseFileURL, the input should be a local file, with the |
| 124 // beginning of the path indicated by the index in |path_begin|. This will |
| 125 // initialize the host, path, query, and ref, and leave the other output |
| 126 // components untouched (DoInitFileURL handles these for us). |
| 127 template<typename CHAR> |
| 128 void DoParseLocalFile(const CHAR* spec, |
| 129 int path_begin, |
| 130 int spec_len, |
| 131 Parsed* parsed) { |
| 132 parsed->host.reset(); |
| 133 ParsePathInternal(spec, MakeRange(path_begin, spec_len), |
| 134 &parsed->path, &parsed->query, &parsed->ref); |
| 135 } |
| 136 |
| 137 // Backend for the external functions that operates on either char type. |
| 138 // We are handed the character after the "file:" at the beginning of the spec. |
| 139 // Usually this is a slash, but needn't be; we allow paths like "file:c:\foo". |
| 140 template<typename CHAR> |
| 141 void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) { |
| 142 DCHECK(spec_len >= 0); |
| 143 |
| 144 // Get the parts we never use for file URLs out of the way. |
| 145 parsed->username.reset(); |
| 146 parsed->password.reset(); |
| 147 parsed->port.reset(); |
| 148 |
| 149 // Many of the code paths don't set these, so it's convenient to just clear |
| 150 // them. We'll write them in those cases we need them. |
| 151 parsed->query.reset(); |
| 152 parsed->ref.reset(); |
| 153 |
| 154 // Strip leading & trailing spaces and control characters. |
| 155 int begin = 0; |
| 156 TrimURL(spec, &begin, &spec_len); |
| 157 |
| 158 // Find the scheme. |
| 159 int num_slashes; |
| 160 int after_scheme; |
| 161 int after_slashes; |
| 162 #ifdef WIN32 |
| 163 // See how many slashes there are. We want to handle cases like UNC but also |
| 164 // "/c:/foo". This is when there is no scheme, so we can allow pages to do |
| 165 // links like "c:/foo/bar" or "//foo/bar". This is also called by the |
| 166 // relative URL resolver when it determines there is an absolute URL, which |
| 167 // may give us input like "/c:/foo". |
| 168 num_slashes = CountConsecutiveSlashes(spec, begin, spec_len); |
| 169 after_slashes = begin + num_slashes; |
| 170 if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) { |
| 171 // Windows path, don't try to extract the scheme (for example, "c:\foo"). |
| 172 parsed->scheme.reset(); |
| 173 after_scheme = after_slashes; |
| 174 } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) { |
| 175 // Windows UNC path: don't try to extract the scheme, but keep the slashes. |
| 176 parsed->scheme.reset(); |
| 177 after_scheme = begin; |
| 178 } else |
| 179 #endif |
| 180 { |
| 181 if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { |
| 182 // Offset the results since we gave ExtractScheme a substring. |
| 183 parsed->scheme.begin += begin; |
| 184 after_scheme = parsed->scheme.end() + 1; |
| 185 } else { |
| 186 // No scheme found, remember that. |
| 187 parsed->scheme.reset(); |
| 188 after_scheme = begin; |
| 189 } |
| 190 } |
| 191 |
| 192 // Handle empty specs ones that contain only whitespace or control chars, |
| 193 // or that are just the scheme (for example "file:"). |
| 194 if (after_scheme == spec_len) { |
| 195 parsed->host.reset(); |
| 196 parsed->path.reset(); |
| 197 return; |
| 198 } |
| 199 |
| 200 num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); |
| 201 |
| 202 after_slashes = after_scheme + num_slashes; |
| 203 #ifdef WIN32 |
| 204 // Check whether the input is a drive again. We checked above for windows |
| 205 // drive specs, but that's only at the very beginning to see if we have a |
| 206 // scheme at all. This test will be duplicated in that case, but will |
| 207 // additionally handle all cases with a real scheme such as "file:///C:/". |
| 208 if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) && |
| 209 num_slashes != 3) { |
| 210 // Anything not beginning with a drive spec ("c:\") on Windows is treated |
| 211 // as UNC, with the exception of three slashes which always means a file. |
| 212 // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails. |
| 213 DoParseUNC(spec, after_slashes, spec_len, parsed); |
| 214 return; |
| 215 } |
| 216 #else |
| 217 // file: URL with exactly 2 slashes is considered to have a host component. |
| 218 if (num_slashes == 2) { |
| 219 DoParseUNC(spec, after_slashes, spec_len, parsed); |
| 220 return; |
| 221 } |
| 222 #endif // WIN32 |
| 223 |
| 224 // Easy and common case, the full path immediately follows the scheme |
| 225 // (modulo slashes), as in "file://c:/foo". Just treat everything from |
| 226 // there to the end as the path. Empty hosts have 0 length instead of -1. |
| 227 // We include the last slash as part of the path if there is one. |
| 228 DoParseLocalFile(spec, |
| 229 num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme, |
| 230 spec_len, parsed); |
| 231 } |
| 232 |
| 233 } // namespace |
| 234 |
| 235 void ParseFileURL(const char* url, int url_len, Parsed* parsed) { |
| 236 DoParseFileURL(url, url_len, parsed); |
| 237 } |
| 238 |
| 239 void ParseFileURL(const char16* url, int url_len, Parsed* parsed) { |
| 240 DoParseFileURL(url, url_len, parsed); |
| 241 } |
| 242 |
| 243 } // namespace url_parse |
OLD | NEW |