OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/logging.h" | 5 #include "base/logging.h" |
6 #include "url/url_canon.h" | 6 #include "url/url_canon.h" |
7 #include "url/url_canon_internal.h" | 7 #include "url/url_canon_internal.h" |
8 #include "url/url_parse_internal.h" | 8 #include "url/url_parse_internal.h" |
9 | 9 |
10 namespace url { | 10 namespace url { |
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
166 // starts with a slash, it should be copied to the output. If no path has | 166 // starts with a slash, it should be copied to the output. If no path has |
167 // already been appended to the output (the case when not resolving | 167 // already been appended to the output (the case when not resolving |
168 // relative URLs), the path should begin with a slash. | 168 // relative URLs), the path should begin with a slash. |
169 // | 169 // |
170 // If there are already path components (this mode is used when appending | 170 // If there are already path components (this mode is used when appending |
171 // relative paths for resolving), it assumes that the output already has | 171 // relative paths for resolving), it assumes that the output already has |
172 // a trailing slash and that if the input begins with a slash, it should be | 172 // a trailing slash and that if the input begins with a slash, it should be |
173 // copied to the output. | 173 // copied to the output. |
174 // | 174 // |
175 // We do not collapse multiple slashes in a row to a single slash. It seems | 175 // We do not collapse multiple slashes in a row to a single slash. It seems |
176 // no web browsers do this, and we don't want incompababilities, even though | 176 // no web browsers do this, and we don't want incompatibilities, even though |
177 // it would be correct for most systems. | 177 // it would be correct for most systems. |
178 template<typename CHAR, typename UCHAR> | 178 template<typename CHAR, typename UCHAR> |
179 bool DoPartialPath(const CHAR* spec, | 179 bool DoPartialPath(const CHAR* spec, |
180 const Component& path, | 180 const Component& path, |
181 int path_begin_in_output, | 181 int path_begin_in_output, |
182 CanonOutput* output) { | 182 CanonOutput* output) { |
183 int end = path.end(); | 183 int end = path.end(); |
184 | 184 |
185 bool success = true; | 185 bool success = true; |
186 for (int i = path.begin; i < end; i++) { | 186 for (int i = path.begin; i < end; i++) { |
187 UCHAR uch = static_cast<UCHAR>(spec[i]); | 187 UCHAR uch = static_cast<UCHAR>(spec[i]); |
188 if (sizeof(CHAR) > sizeof(char) && uch >= 0x80) { | 188 if (sizeof(CHAR) > sizeof(char) && uch >= 0x80) { |
189 // We only need to test wide input for having non-ASCII characters. For | 189 // We only need to test wide input for having non-ASCII characters. For |
190 // narrow input, we'll always just use the lookup table. We don't try to | 190 // narrow input, we'll always just use the lookup table. We don't try to |
191 // do anything tricky with decoding/validating UTF-8. This function will | 191 // do anything tricky with decoding/validating UTF-8. This function will |
192 // read one or two UTF-16 characters and append the output as UTF-8. This | 192 // read one or two UTF-16 characters and append the output as UTF-8. This |
193 // call will be removed in 8-bit mode. | 193 // call will be removed in 8-bit mode. |
194 success &= AppendUTF8EscapedChar(spec, &i, end, output); | 194 success &= AppendUTF8EscapedChar(spec, &i, end, output); |
195 } else { | 195 } else { |
196 // Normal ASCII character or 8-bit input, use the lookup table. | 196 // Normal ASCII character or 8-bit input, use the lookup table. |
197 unsigned char out_ch = static_cast<unsigned char>(uch); | 197 unsigned char out_ch = static_cast<unsigned char>(uch); |
198 unsigned char flags = kPathCharLookup[out_ch]; | 198 unsigned char flags = kPathCharLookup[out_ch]; |
199 if (flags & SPECIAL) { | 199 if (flags & SPECIAL) { |
200 // Needs special handling of some sort. | 200 // Needs special handling of some sort. |
201 int dotlen; | 201 int dotlen; |
202 if ((dotlen = IsDot(spec, i, end)) > 0) { | 202 if ((dotlen = IsDot(spec, i, end)) > 0) { |
203 // See if this dot was preceeded by a slash in the output. We | 203 // See if this dot was preceded by a slash in the output. We |
204 // assume that when canonicalizing paths, they will always | 204 // assume that when canonicalizing paths, they will always |
205 // start with a slash and not a dot, so we don't have to | 205 // start with a slash and not a dot, so we don't have to |
206 // bounds check the output. | 206 // bounds check the output. |
207 // | 207 // |
208 // Note that we check this in the case of dots so we don't have to | 208 // Note that we check this in the case of dots so we don't have to |
209 // special case slashes. Since slashes are much more common than | 209 // special case slashes. Since slashes are much more common than |
210 // dots, this actually increases performance measurably (though | 210 // dots, this actually increases performance measurably (though |
211 // slightly). | 211 // slightly). |
212 DCHECK(output->length() > path_begin_in_output); | 212 DCHECK(output->length() > path_begin_in_output); |
213 if (output->length() > path_begin_in_output && | 213 if (output->length() > path_begin_in_output && |
214 output->at(output->length() - 1) == '/') { | 214 output->at(output->length() - 1) == '/') { |
215 // Slash followed by a dot, check to see if this is means relative | 215 // Slash followed by a dot, check to see if this is means relative |
216 int consumed_len; | 216 int consumed_len; |
217 switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end, | 217 switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end, |
218 &consumed_len)) { | 218 &consumed_len)) { |
219 case NOT_A_DIRECTORY: | 219 case NOT_A_DIRECTORY: |
220 // Copy the dot to the output, it means nothing special. | 220 // Copy the dot to the output, it means nothing special. |
221 output->push_back('.'); | 221 output->push_back('.'); |
222 i += dotlen - 1; | 222 i += dotlen - 1; |
223 break; | 223 break; |
224 case DIRECTORY_CUR: // Current directory, just skip the input. | 224 case DIRECTORY_CUR: // Current directory, just skip the input. |
225 i += dotlen + consumed_len - 1; | 225 i += dotlen + consumed_len - 1; |
226 break; | 226 break; |
227 case DIRECTORY_UP: | 227 case DIRECTORY_UP: |
228 BackUpToPreviousSlash(path_begin_in_output, output); | 228 BackUpToPreviousSlash(path_begin_in_output, output); |
229 i += dotlen + consumed_len - 1; | 229 i += dotlen + consumed_len - 1; |
230 break; | 230 break; |
231 } | 231 } |
232 } else { | 232 } else { |
233 // This dot is not preceeded by a slash, it is just part of some | 233 // This dot is not preceded by a slash, it is just part of some |
234 // file name. | 234 // file name. |
235 output->push_back('.'); | 235 output->push_back('.'); |
236 i += dotlen - 1; | 236 i += dotlen - 1; |
237 } | 237 } |
238 | 238 |
239 } else if (out_ch == '\\') { | 239 } else if (out_ch == '\\') { |
240 // Convert backslashes to forward slashes | 240 // Convert backslashes to forward slashes |
241 output->push_back('/'); | 241 output->push_back('/'); |
242 | 242 |
243 } else if (out_ch == '%') { | 243 } else if (out_ch == '%') { |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
344 bool CanonicalizePartialPath(const base::char16* spec, | 344 bool CanonicalizePartialPath(const base::char16* spec, |
345 const Component& path, | 345 const Component& path, |
346 int path_begin_in_output, | 346 int path_begin_in_output, |
347 CanonOutput* output) { | 347 CanonOutput* output) { |
348 return DoPartialPath<base::char16, base::char16>(spec, path, | 348 return DoPartialPath<base::char16, base::char16>(spec, path, |
349 path_begin_in_output, | 349 path_begin_in_output, |
350 output); | 350 output); |
351 } | 351 } |
352 | 352 |
353 } // namespace url | 353 } // namespace url |
OLD | NEW |