| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/logging.h" | 5 #include "base/logging.h" |
| 6 #include "url/url_canon.h" | 6 #include "url/url_canon.h" |
| 7 #include "url/url_canon_internal.h" | 7 #include "url/url_canon_internal.h" |
| 8 #include "url/url_parse_internal.h" | 8 #include "url/url_parse_internal.h" |
| 9 | 9 |
| 10 namespace url { | 10 namespace url { |
| (...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 166 // starts with a slash, it should be copied to the output. If no path has | 166 // starts with a slash, it should be copied to the output. If no path has |
| 167 // already been appended to the output (the case when not resolving | 167 // already been appended to the output (the case when not resolving |
| 168 // relative URLs), the path should begin with a slash. | 168 // relative URLs), the path should begin with a slash. |
| 169 // | 169 // |
| 170 // If there are already path components (this mode is used when appending | 170 // If there are already path components (this mode is used when appending |
| 171 // relative paths for resolving), it assumes that the output already has | 171 // relative paths for resolving), it assumes that the output already has |
| 172 // a trailing slash and that if the input begins with a slash, it should be | 172 // a trailing slash and that if the input begins with a slash, it should be |
| 173 // copied to the output. | 173 // copied to the output. |
| 174 // | 174 // |
| 175 // We do not collapse multiple slashes in a row to a single slash. It seems | 175 // We do not collapse multiple slashes in a row to a single slash. It seems |
| 176 // no web browsers do this, and we don't want incompababilities, even though | 176 // no web browsers do this, and we don't want incompatibilities, even though |
| 177 // it would be correct for most systems. | 177 // it would be correct for most systems. |
| 178 template<typename CHAR, typename UCHAR> | 178 template<typename CHAR, typename UCHAR> |
| 179 bool DoPartialPath(const CHAR* spec, | 179 bool DoPartialPath(const CHAR* spec, |
| 180 const Component& path, | 180 const Component& path, |
| 181 int path_begin_in_output, | 181 int path_begin_in_output, |
| 182 CanonOutput* output) { | 182 CanonOutput* output) { |
| 183 int end = path.end(); | 183 int end = path.end(); |
| 184 | 184 |
| 185 bool success = true; | 185 bool success = true; |
| 186 for (int i = path.begin; i < end; i++) { | 186 for (int i = path.begin; i < end; i++) { |
| 187 UCHAR uch = static_cast<UCHAR>(spec[i]); | 187 UCHAR uch = static_cast<UCHAR>(spec[i]); |
| 188 if (sizeof(CHAR) > sizeof(char) && uch >= 0x80) { | 188 if (sizeof(CHAR) > sizeof(char) && uch >= 0x80) { |
| 189 // We only need to test wide input for having non-ASCII characters. For | 189 // We only need to test wide input for having non-ASCII characters. For |
| 190 // narrow input, we'll always just use the lookup table. We don't try to | 190 // narrow input, we'll always just use the lookup table. We don't try to |
| 191 // do anything tricky with decoding/validating UTF-8. This function will | 191 // do anything tricky with decoding/validating UTF-8. This function will |
| 192 // read one or two UTF-16 characters and append the output as UTF-8. This | 192 // read one or two UTF-16 characters and append the output as UTF-8. This |
| 193 // call will be removed in 8-bit mode. | 193 // call will be removed in 8-bit mode. |
| 194 success &= AppendUTF8EscapedChar(spec, &i, end, output); | 194 success &= AppendUTF8EscapedChar(spec, &i, end, output); |
| 195 } else { | 195 } else { |
| 196 // Normal ASCII character or 8-bit input, use the lookup table. | 196 // Normal ASCII character or 8-bit input, use the lookup table. |
| 197 unsigned char out_ch = static_cast<unsigned char>(uch); | 197 unsigned char out_ch = static_cast<unsigned char>(uch); |
| 198 unsigned char flags = kPathCharLookup[out_ch]; | 198 unsigned char flags = kPathCharLookup[out_ch]; |
| 199 if (flags & SPECIAL) { | 199 if (flags & SPECIAL) { |
| 200 // Needs special handling of some sort. | 200 // Needs special handling of some sort. |
| 201 int dotlen; | 201 int dotlen; |
| 202 if ((dotlen = IsDot(spec, i, end)) > 0) { | 202 if ((dotlen = IsDot(spec, i, end)) > 0) { |
| 203 // See if this dot was preceeded by a slash in the output. We | 203 // See if this dot was preceded by a slash in the output. We |
| 204 // assume that when canonicalizing paths, they will always | 204 // assume that when canonicalizing paths, they will always |
| 205 // start with a slash and not a dot, so we don't have to | 205 // start with a slash and not a dot, so we don't have to |
| 206 // bounds check the output. | 206 // bounds check the output. |
| 207 // | 207 // |
| 208 // Note that we check this in the case of dots so we don't have to | 208 // Note that we check this in the case of dots so we don't have to |
| 209 // special case slashes. Since slashes are much more common than | 209 // special case slashes. Since slashes are much more common than |
| 210 // dots, this actually increases performance measurably (though | 210 // dots, this actually increases performance measurably (though |
| 211 // slightly). | 211 // slightly). |
| 212 DCHECK(output->length() > path_begin_in_output); | 212 DCHECK(output->length() > path_begin_in_output); |
| 213 if (output->length() > path_begin_in_output && | 213 if (output->length() > path_begin_in_output && |
| 214 output->at(output->length() - 1) == '/') { | 214 output->at(output->length() - 1) == '/') { |
| 215 // Slash followed by a dot, check to see if this is means relative | 215 // Slash followed by a dot, check to see if this is means relative |
| 216 int consumed_len; | 216 int consumed_len; |
| 217 switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end, | 217 switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end, |
| 218 &consumed_len)) { | 218 &consumed_len)) { |
| 219 case NOT_A_DIRECTORY: | 219 case NOT_A_DIRECTORY: |
| 220 // Copy the dot to the output, it means nothing special. | 220 // Copy the dot to the output, it means nothing special. |
| 221 output->push_back('.'); | 221 output->push_back('.'); |
| 222 i += dotlen - 1; | 222 i += dotlen - 1; |
| 223 break; | 223 break; |
| 224 case DIRECTORY_CUR: // Current directory, just skip the input. | 224 case DIRECTORY_CUR: // Current directory, just skip the input. |
| 225 i += dotlen + consumed_len - 1; | 225 i += dotlen + consumed_len - 1; |
| 226 break; | 226 break; |
| 227 case DIRECTORY_UP: | 227 case DIRECTORY_UP: |
| 228 BackUpToPreviousSlash(path_begin_in_output, output); | 228 BackUpToPreviousSlash(path_begin_in_output, output); |
| 229 i += dotlen + consumed_len - 1; | 229 i += dotlen + consumed_len - 1; |
| 230 break; | 230 break; |
| 231 } | 231 } |
| 232 } else { | 232 } else { |
| 233 // This dot is not preceeded by a slash, it is just part of some | 233 // This dot is not preceded by a slash, it is just part of some |
| 234 // file name. | 234 // file name. |
| 235 output->push_back('.'); | 235 output->push_back('.'); |
| 236 i += dotlen - 1; | 236 i += dotlen - 1; |
| 237 } | 237 } |
| 238 | 238 |
| 239 } else if (out_ch == '\\') { | 239 } else if (out_ch == '\\') { |
| 240 // Convert backslashes to forward slashes | 240 // Convert backslashes to forward slashes |
| 241 output->push_back('/'); | 241 output->push_back('/'); |
| 242 | 242 |
| 243 } else if (out_ch == '%') { | 243 } else if (out_ch == '%') { |
| (...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 344 bool CanonicalizePartialPath(const base::char16* spec, | 344 bool CanonicalizePartialPath(const base::char16* spec, |
| 345 const Component& path, | 345 const Component& path, |
| 346 int path_begin_in_output, | 346 int path_begin_in_output, |
| 347 CanonOutput* output) { | 347 CanonOutput* output) { |
| 348 return DoPartialPath<base::char16, base::char16>(spec, path, | 348 return DoPartialPath<base::char16, base::char16>(spec, path, |
| 349 path_begin_in_output, | 349 path_begin_in_output, |
| 350 output); | 350 output); |
| 351 } | 351 } |
| 352 | 352 |
| 353 } // namespace url | 353 } // namespace url |
| OLD | NEW |