| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Canonicalizers for random bits that aren't big enough for their own files. | 5 // Canonicalizers for random bits that aren't big enough for their own files. |
| 6 | 6 |
| 7 #include <string.h> | 7 #include <string.h> |
| 8 | 8 |
| 9 #include "url/url_canon.h" | 9 #include "url/url_canon.h" |
| 10 #include "url/url_canon_internal.h" | 10 #include "url/url_canon_internal.h" |
| 11 | 11 |
| 12 namespace url { | 12 namespace url { |
| 13 | 13 |
| 14 namespace { | 14 namespace { |
| 15 | 15 |
| 16 // Returns true if the given character should be removed from the middle of a | 16 // Returns true if the given character should be removed from the middle of a |
| 17 // URL. | 17 // URL. |
| 18 inline bool IsRemovableURLWhitespace(int ch) { | 18 inline bool IsRemovableURLWhitespace(int ch) { |
| 19 return ch == '\r' || ch == '\n' || ch == '\t'; | 19 return ch == '\r' || ch == '\n' || ch == '\t'; |
| 20 } | 20 } |
| 21 | 21 |
| 22 // Backend for RemoveURLWhitespace (see declaration in url_canon.h). | 22 // Backend for RemoveURLWhitespace (see declaration in url_canon.h). |
| 23 // It sucks that we have to do this, since this takes about 13% of the total URL | 23 // It sucks that we have to do this, since this takes about 13% of the total URL |
| 24 // canonicalization time. | 24 // canonicalization time. |
| 25 template<typename CHAR> | 25 template <typename CHAR> |
| 26 const CHAR* DoRemoveURLWhitespace(const CHAR* input, int input_len, | 26 const CHAR* DoRemoveURLWhitespace(const CHAR* input, |
| 27 int input_len, |
| 27 CanonOutputT<CHAR>* buffer, | 28 CanonOutputT<CHAR>* buffer, |
| 28 int* output_len) { | 29 int* output_len, |
| 30 bool* potentially_dangling_markup) { |
| 29 // Fast verification that there's nothing that needs removal. This is the 99% | 31 // Fast verification that there's nothing that needs removal. This is the 99% |
| 30 // case, so we want it to be fast and don't care about impacting the speed | 32 // case, so we want it to be fast and don't care about impacting the speed |
| 31 // when we do find whitespace. | 33 // when we do find whitespace. |
| 32 int found_whitespace = false; | 34 int found_whitespace = false; |
| 33 for (int i = 0; i < input_len; i++) { | 35 for (int i = 0; i < input_len; i++) { |
| 34 if (!IsRemovableURLWhitespace(input[i])) | 36 if (!IsRemovableURLWhitespace(input[i])) |
| 35 continue; | 37 continue; |
| 36 found_whitespace = true; | 38 found_whitespace = true; |
| 37 break; | 39 break; |
| 38 } | 40 } |
| 39 | 41 |
| 40 if (!found_whitespace) { | 42 if (!found_whitespace) { |
| 41 // Didn't find any whitespace, we don't need to do anything. We can just | 43 // Didn't find any whitespace, we don't need to do anything. We can just |
| 42 // return the input as the output. | 44 // return the input as the output. |
| 43 *output_len = input_len; | 45 *output_len = input_len; |
| 44 return input; | 46 return input; |
| 45 } | 47 } |
| 46 | 48 |
| 47 // Remove the whitespace into the new buffer and return it. | 49 // Remove the whitespace into the new buffer and return it. |
| 48 for (int i = 0; i < input_len; i++) { | 50 for (int i = 0; i < input_len; i++) { |
| 49 if (!IsRemovableURLWhitespace(input[i])) | 51 if (!IsRemovableURLWhitespace(input[i])) { |
| 52 if (potentially_dangling_markup && input[i] == 0x3C) |
| 53 *potentially_dangling_markup = true; |
| 50 buffer->push_back(input[i]); | 54 buffer->push_back(input[i]); |
| 55 } |
| 51 } | 56 } |
| 52 *output_len = buffer->length(); | 57 *output_len = buffer->length(); |
| 53 return buffer->data(); | 58 return buffer->data(); |
| 54 } | 59 } |
| 55 | 60 |
| 56 // Contains the canonical version of each possible input letter in the scheme | 61 // Contains the canonical version of each possible input letter in the scheme |
| 57 // (basically, lower-cased). The corresponding entry will be 0 if the letter | 62 // (basically, lower-cased). The corresponding entry will be 0 if the letter |
| 58 // is not allowed in a scheme. | 63 // is not allowed in a scheme. |
| 59 const char kSchemeCanonical[0x80] = { | 64 const char kSchemeCanonical[0x80] = { |
| 60 // 00-1f: all are invalid | 65 // 00-1f: all are invalid |
| (...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 267 ReadUTFChar(spec, &i, end, &code_point); | 272 ReadUTFChar(spec, &i, end, &code_point); |
| 268 AppendUTF8Value(code_point, output); | 273 AppendUTF8Value(code_point, output); |
| 269 } | 274 } |
| 270 } | 275 } |
| 271 | 276 |
| 272 out_ref->len = output->length() - out_ref->begin; | 277 out_ref->len = output->length() - out_ref->begin; |
| 273 } | 278 } |
| 274 | 279 |
| 275 } // namespace | 280 } // namespace |
| 276 | 281 |
| 277 const char* RemoveURLWhitespace(const char* input, int input_len, | 282 const char* RemoveURLWhitespace(const char* input, |
| 283 int input_len, |
| 278 CanonOutputT<char>* buffer, | 284 CanonOutputT<char>* buffer, |
| 279 int* output_len) { | 285 int* output_len, |
| 280 return DoRemoveURLWhitespace(input, input_len, buffer, output_len); | 286 bool* potentially_dangling_markup) { |
| 287 return DoRemoveURLWhitespace(input, input_len, buffer, output_len, |
| 288 potentially_dangling_markup); |
| 281 } | 289 } |
| 282 | 290 |
| 283 const base::char16* RemoveURLWhitespace(const base::char16* input, | 291 const base::char16* RemoveURLWhitespace(const base::char16* input, |
| 284 int input_len, | 292 int input_len, |
| 285 CanonOutputT<base::char16>* buffer, | 293 CanonOutputT<base::char16>* buffer, |
| 286 int* output_len) { | 294 int* output_len, |
| 287 return DoRemoveURLWhitespace(input, input_len, buffer, output_len); | 295 bool* potentially_dangling_markup) { |
| 296 return DoRemoveURLWhitespace(input, input_len, buffer, output_len, |
| 297 potentially_dangling_markup); |
| 288 } | 298 } |
| 289 | 299 |
| 290 char CanonicalSchemeChar(base::char16 ch) { | 300 char CanonicalSchemeChar(base::char16 ch) { |
| 291 if (ch >= 0x80) | 301 if (ch >= 0x80) |
| 292 return 0; // Non-ASCII is not supported by schemes. | 302 return 0; // Non-ASCII is not supported by schemes. |
| 293 return kSchemeCanonical[ch]; | 303 return kSchemeCanonical[ch]; |
| 294 } | 304 } |
| 295 | 305 |
| 296 bool CanonicalizeScheme(const char* spec, | 306 bool CanonicalizeScheme(const char* spec, |
| 297 const Component& scheme, | 307 const Component& scheme, |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 358 } | 368 } |
| 359 | 369 |
| 360 void CanonicalizeRef(const base::char16* spec, | 370 void CanonicalizeRef(const base::char16* spec, |
| 361 const Component& ref, | 371 const Component& ref, |
| 362 CanonOutput* output, | 372 CanonOutput* output, |
| 363 Component* out_ref) { | 373 Component* out_ref) { |
| 364 DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref); | 374 DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref); |
| 365 } | 375 } |
| 366 | 376 |
| 367 } // namespace url | 377 } // namespace url |
| OLD | NEW |