| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef URL_URL_CANON_INTERNAL_H_ | 5 #ifndef URL_URL_CANON_INTERNAL_H_ |
| 6 #define URL_URL_CANON_INTERNAL_H_ | 6 #define URL_URL_CANON_INTERNAL_H_ |
| 7 | 7 |
| 8 // This file is intended to be included in another C++ file where the character | 8 // This file is intended to be included in another C++ file where the character |
| 9 // types are defined. This allows us to write mostly generic code, but not have | 9 // types are defined. This allows us to write mostly generic code, but not have |
| 10 // templace bloat because everything is inlined when anybody calls any of our | 10 // templace bloat because everything is inlined when anybody calls any of our |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 76 // match the given |type| in SharedCharTypes. | 76 // match the given |type| in SharedCharTypes. |
| 77 void AppendStringOfType(const char* source, int length, | 77 void AppendStringOfType(const char* source, int length, |
| 78 SharedCharTypes type, | 78 SharedCharTypes type, |
| 79 CanonOutput* output); | 79 CanonOutput* output); |
| 80 void AppendStringOfType(const char16* source, int length, | 80 void AppendStringOfType(const char16* source, int length, |
| 81 SharedCharTypes type, | 81 SharedCharTypes type, |
| 82 CanonOutput* output); | 82 CanonOutput* output); |
| 83 | 83 |
| 84 // Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit | 84 // Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit |
| 85 // that will be used to represent it. | 85 // that will be used to represent it. |
| 86 extern const char kHexCharLookup[0x10]; | 86 URL_EXPORT extern const char kHexCharLookup[0x10]; |
| 87 | 87 |
| 88 // This lookup table allows fast conversion between ASCII hex letters and their | 88 // This lookup table allows fast conversion between ASCII hex letters and their |
| 89 // corresponding numerical value. The 8-bit range is divided up into 8 | 89 // corresponding numerical value. The 8-bit range is divided up into 8 |
| 90 // regions of 0x20 characters each. Each of the three character types (numbers, | 90 // regions of 0x20 characters each. Each of the three character types (numbers, |
| 91 // uppercase, lowercase) falls into different regions of this range. The table | 91 // uppercase, lowercase) falls into different regions of this range. The table |
| 92 // contains the amount to subtract from characters in that range to get at | 92 // contains the amount to subtract from characters in that range to get at |
| 93 // the corresponding numerical value. | 93 // the corresponding numerical value. |
| 94 // | 94 // |
| 95 // See HexDigitToValue for the lookup. | 95 // See HexDigitToValue for the lookup. |
| 96 extern const char kCharToHexLookup[8]; | 96 extern const char kCharToHexLookup[8]; |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 143 // Reads one character in UTF-8 starting at |*begin| in |str| and places | 143 // Reads one character in UTF-8 starting at |*begin| in |str| and places |
| 144 // the decoded value into |*code_point|. If the character is valid, we will | 144 // the decoded value into |*code_point|. If the character is valid, we will |
| 145 // return true. If invalid, we'll return false and put the | 145 // return true. If invalid, we'll return false and put the |
| 146 // kUnicodeReplacementCharacter into |*code_point|. | 146 // kUnicodeReplacementCharacter into |*code_point|. |
| 147 // | 147 // |
| 148 // |*begin| will be updated to point to the last character consumed so it | 148 // |*begin| will be updated to point to the last character consumed so it |
| 149 // can be incremented in a loop and will be ready for the next character. | 149 // can be incremented in a loop and will be ready for the next character. |
| 150 // (for a single-byte ASCII character, it will not be changed). | 150 // (for a single-byte ASCII character, it will not be changed). |
| 151 // | 151 // |
| 152 // Implementation is in url_canon_icu.cc. | 152 // Implementation is in url_canon_icu.cc. |
| 153 bool ReadUTFChar(const char* str, int* begin, int length, | 153 URL_EXPORT bool ReadUTFChar(const char* str, int* begin, int length, |
| 154 unsigned* code_point_out); | 154 unsigned* code_point_out); |
| 155 | 155 |
| 156 // Generic To-UTF-8 converter. This will call the given append method for each | 156 // Generic To-UTF-8 converter. This will call the given append method for each |
| 157 // character that should be appended, with the given output method. Wrappers | 157 // character that should be appended, with the given output method. Wrappers |
| 158 // are provided below for escaped and non-escaped versions of this. | 158 // are provided below for escaped and non-escaped versions of this. |
| 159 // | 159 // |
| 160 // The char_value must have already been checked that it's a valid Unicode | 160 // The char_value must have already been checked that it's a valid Unicode |
| 161 // character. | 161 // character. |
| 162 template<class Output, void Appender(unsigned char, Output*)> | 162 template<class Output, void Appender(unsigned char, Output*)> |
| 163 inline void DoAppendUTF8(unsigned char_value, Output* output) { | 163 inline void DoAppendUTF8(unsigned char_value, Output* output) { |
| 164 if (char_value <= 0x7f) { | 164 if (char_value <= 0x7f) { |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 220 // Reads one character in UTF-16 starting at |*begin| in |str| and places | 220 // Reads one character in UTF-16 starting at |*begin| in |str| and places |
| 221 // the decoded value into |*code_point|. If the character is valid, we will | 221 // the decoded value into |*code_point|. If the character is valid, we will |
| 222 // return true. If invalid, we'll return false and put the | 222 // return true. If invalid, we'll return false and put the |
| 223 // kUnicodeReplacementCharacter into |*code_point|. | 223 // kUnicodeReplacementCharacter into |*code_point|. |
| 224 // | 224 // |
| 225 // |*begin| will be updated to point to the last character consumed so it | 225 // |*begin| will be updated to point to the last character consumed so it |
| 226 // can be incremented in a loop and will be ready for the next character. | 226 // can be incremented in a loop and will be ready for the next character. |
| 227 // (for a single-16-bit-word character, it will not be changed). | 227 // (for a single-16-bit-word character, it will not be changed). |
| 228 // | 228 // |
| 229 // Implementation is in url_canon_icu.cc. | 229 // Implementation is in url_canon_icu.cc. |
| 230 bool ReadUTFChar(const char16* str, int* begin, int length, | 230 URL_EXPORT bool ReadUTFChar(const char16* str, int* begin, int length, |
| 231 unsigned* code_point); | 231 unsigned* code_point); |
| 232 | 232 |
| 233 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. | 233 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. |
| 234 inline void AppendUTF16Value(unsigned code_point, | 234 inline void AppendUTF16Value(unsigned code_point, |
| 235 CanonOutputT<char16>* output) { | 235 CanonOutputT<char16>* output) { |
| 236 if (code_point > 0xffff) { | 236 if (code_point > 0xffff) { |
| 237 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0)); | 237 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0)); |
| 238 output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00)); | 238 output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00)); |
| 239 } else { | 239 } else { |
| 240 output->push_back(static_cast<char16>(code_point)); | 240 output->push_back(static_cast<char16>(code_point)); |
| 241 } | 241 } |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 339 // Misc canonicalization helpers ---------------------------------------------- | 339 // Misc canonicalization helpers ---------------------------------------------- |
| 340 | 340 |
| 341 // Converts between UTF-8 and UTF-16, returning true on successful conversion. | 341 // Converts between UTF-8 and UTF-16, returning true on successful conversion. |
| 342 // The output will be appended to the given canonicalizer output (so make sure | 342 // The output will be appended to the given canonicalizer output (so make sure |
| 343 // it's empty if you want to replace). | 343 // it's empty if you want to replace). |
| 344 // | 344 // |
| 345 // On invalid input, this will still write as much output as possible, | 345 // On invalid input, this will still write as much output as possible, |
| 346 // replacing the invalid characters with the "invalid character". It will | 346 // replacing the invalid characters with the "invalid character". It will |
| 347 // return false in the failure case, and the caller should not continue as | 347 // return false in the failure case, and the caller should not continue as |
| 348 // normal. | 348 // normal. |
| 349 bool ConvertUTF16ToUTF8(const char16* input, int input_len, | 349 URL_EXPORT bool ConvertUTF16ToUTF8(const char16* input, int input_len, |
| 350 CanonOutput* output); | 350 CanonOutput* output); |
| 351 bool ConvertUTF8ToUTF16(const char* input, int input_len, | 351 URL_EXPORT bool ConvertUTF8ToUTF16(const char* input, int input_len, |
| 352 CanonOutputT<char16>* output); | 352 CanonOutputT<char16>* output); |
| 353 | 353 |
| 354 // Converts from UTF-16 to 8-bit using the character set converter. If the | 354 // Converts from UTF-16 to 8-bit using the character set converter. If the |
| 355 // converter is NULL, this will use UTF-8. | 355 // converter is NULL, this will use UTF-8. |
| 356 void ConvertUTF16ToQueryEncoding(const char16* input, | 356 void ConvertUTF16ToQueryEncoding(const char16* input, |
| 357 const url_parse::Component& query, | 357 const url_parse::Component& query, |
| 358 CharsetConverter* converter, | 358 CharsetConverter* converter, |
| 359 CanonOutput* output); | 359 CanonOutput* output); |
| 360 | 360 |
| 361 // Applies the replacements to the given component source. The component source | 361 // Applies the replacements to the given component source. The component source |
| 362 // should be pre-initialized to the "old" base. That is, all pointers will | 362 // should be pre-initialized to the "old" base. That is, all pointers will |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 401 int path_begin_in_output, | 401 int path_begin_in_output, |
| 402 CanonOutput* output); | 402 CanonOutput* output); |
| 403 bool CanonicalizePartialPath(const char16* spec, | 403 bool CanonicalizePartialPath(const char16* spec, |
| 404 const url_parse::Component& path, | 404 const url_parse::Component& path, |
| 405 int path_begin_in_output, | 405 int path_begin_in_output, |
| 406 CanonOutput* output); | 406 CanonOutput* output); |
| 407 | 407 |
| 408 #ifndef WIN32 | 408 #ifndef WIN32 |
| 409 | 409 |
| 410 // Implementations of Windows' int-to-string conversions | 410 // Implementations of Windows' int-to-string conversions |
| 411 int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix); | 411 URL_EXPORT int _itoa_s(int value, char* buffer, size_t size_in_chars, |
| 412 int _itow_s(int value, char16* buffer, size_t size_in_chars, | 412 int radix); |
| 413 int radix); | 413 URL_EXPORT int _itow_s(int value, char16* buffer, size_t size_in_chars, |
| 414 int radix); |
| 414 | 415 |
| 415 // Secure template overloads for these functions | 416 // Secure template overloads for these functions |
| 416 template<size_t N> | 417 template<size_t N> |
| 417 inline int _itoa_s(int value, char (&buffer)[N], int radix) { | 418 inline int _itoa_s(int value, char (&buffer)[N], int radix) { |
| 418 return _itoa_s(value, buffer, N, radix); | 419 return _itoa_s(value, buffer, N, radix); |
| 419 } | 420 } |
| 420 | 421 |
| 421 template<size_t N> | 422 template<size_t N> |
| 422 inline int _itow_s(int value, char16 (&buffer)[N], int radix) { | 423 inline int _itow_s(int value, char16 (&buffer)[N], int radix) { |
| 423 return _itow_s(value, buffer, N, radix); | 424 return _itow_s(value, buffer, N, radix); |
| 424 } | 425 } |
| 425 | 426 |
| 426 // _strtoui64 and strtoull behave the same | 427 // _strtoui64 and strtoull behave the same |
| 427 inline unsigned long long _strtoui64(const char* nptr, | 428 inline unsigned long long _strtoui64(const char* nptr, |
| 428 char** endptr, int base) { | 429 char** endptr, int base) { |
| 429 return strtoull(nptr, endptr, base); | 430 return strtoull(nptr, endptr, base); |
| 430 } | 431 } |
| 431 | 432 |
| 432 #endif // WIN32 | 433 #endif // WIN32 |
| 433 | 434 |
| 434 } // namespace url_canon | 435 } // namespace url_canon |
| 435 | 436 |
| 436 #endif // URL_URL_CANON_INTERNAL_H_ | 437 #endif // URL_URL_CANON_INTERNAL_H_ |
| OLD | NEW |