| OLD | NEW |
| 1 // Copyright 2011, Google Inc. | 1 // Copyright 2011, Google Inc. |
| 2 // All rights reserved. | 2 // All rights reserved. |
| 3 // | 3 // |
| 4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
| 5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
| 6 // met: | 6 // met: |
| 7 // | 7 // |
| 8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
| 9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
| 10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
| 11 // copyright notice, this list of conditions and the following disclaimer | 11 // copyright notice, this list of conditions and the following disclaimer |
| 12 // in the documentation and/or other materials provided with the | 12 // in the documentation and/or other materials provided with the |
| 13 // distribution. | 13 // distribution. |
| 14 // * Neither the name of Google Inc. nor the names of its | 14 // * Neither the name of Google Inc. nor the names of its |
| 15 // contributors may be used to endorse or promote products derived from | 15 // contributors may be used to endorse or promote products derived from |
| 16 // this software without specific prior written permission. | 16 // this software without specific prior written permission. |
| 17 // | 17 // |
| 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | 29 |
| 30 #ifndef URL_URL_CANON_INTERNAL_H_ |
| 31 #define URL_URL_CANON_INTERNAL_H_ |
| 32 |
| 30 // This file is intended to be included in another C++ file where the character | 33 // This file is intended to be included in another C++ file where the character |
| 31 // types are defined. This allows us to write mostly generic code, but not have | 34 // types are defined. This allows us to write mostly generic code, but not have |
| 32 // templace bloat because everything is inlined when anybody calls any of our | 35 // templace bloat because everything is inlined when anybody calls any of our |
| 33 // functions. | 36 // functions. |
| 34 | 37 |
| 35 #ifndef GOOGLEURL_SRC_URL_CANON_INTERNAL_H__ | |
| 36 #define GOOGLEURL_SRC_URL_CANON_INTERNAL_H__ | |
| 37 | |
| 38 #include <stdlib.h> | 38 #include <stdlib.h> |
| 39 | 39 |
| 40 #include "base/logging.h" | 40 #include "base/logging.h" |
| 41 #include "googleurl/src/url_canon.h" | 41 #include "url/url_canon.h" |
| 42 | 42 |
| 43 namespace url_canon { | 43 namespace url_canon { |
| 44 | 44 |
| 45 // Character type handling ----------------------------------------------------- | 45 // Character type handling ----------------------------------------------------- |
| 46 | 46 |
| 47 // Bits that identify different character types. These types identify different | 47 // Bits that identify different character types. These types identify different |
| 48 // bits that are set for each 8-bit character in the kSharedCharTypeTable. | 48 // bits that are set for each 8-bit character in the kSharedCharTypeTable. |
| 49 enum SharedCharTypes { | 49 enum SharedCharTypes { |
| 50 // Characters that do not require escaping in queries. Characters that do | 50 // Characters that do not require escaping in queries. Characters that do |
| 51 // not have this flag will be escaped; see url_canon_query.cc | 51 // not have this flag will be escaped; see url_canon_query.cc |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 101 // match the given |type| in SharedCharTypes. | 101 // match the given |type| in SharedCharTypes. |
| 102 void AppendStringOfType(const char* source, int length, | 102 void AppendStringOfType(const char* source, int length, |
| 103 SharedCharTypes type, | 103 SharedCharTypes type, |
| 104 CanonOutput* output); | 104 CanonOutput* output); |
| 105 void AppendStringOfType(const char16* source, int length, | 105 void AppendStringOfType(const char16* source, int length, |
| 106 SharedCharTypes type, | 106 SharedCharTypes type, |
| 107 CanonOutput* output); | 107 CanonOutput* output); |
| 108 | 108 |
| 109 // Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit | 109 // Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit |
| 110 // that will be used to represent it. | 110 // that will be used to represent it. |
| 111 GURL_API extern const char kHexCharLookup[0x10]; | 111 extern const char kHexCharLookup[0x10]; |
| 112 | 112 |
| 113 // This lookup table allows fast conversion between ASCII hex letters and their | 113 // This lookup table allows fast conversion between ASCII hex letters and their |
| 114 // corresponding numerical value. The 8-bit range is divided up into 8 | 114 // corresponding numerical value. The 8-bit range is divided up into 8 |
| 115 // regions of 0x20 characters each. Each of the three character types (numbers, | 115 // regions of 0x20 characters each. Each of the three character types (numbers, |
| 116 // uppercase, lowercase) falls into different regions of this range. The table | 116 // uppercase, lowercase) falls into different regions of this range. The table |
| 117 // contains the amount to subtract from characters in that range to get at | 117 // contains the amount to subtract from characters in that range to get at |
| 118 // the corresponding numerical value. | 118 // the corresponding numerical value. |
| 119 // | 119 // |
| 120 // See HexDigitToValue for the lookup. | 120 // See HexDigitToValue for the lookup. |
| 121 extern const char kCharToHexLookup[8]; | 121 extern const char kCharToHexLookup[8]; |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 168 // Reads one character in UTF-8 starting at |*begin| in |str| and places | 168 // Reads one character in UTF-8 starting at |*begin| in |str| and places |
| 169 // the decoded value into |*code_point|. If the character is valid, we will | 169 // the decoded value into |*code_point|. If the character is valid, we will |
| 170 // return true. If invalid, we'll return false and put the | 170 // return true. If invalid, we'll return false and put the |
| 171 // kUnicodeReplacementCharacter into |*code_point|. | 171 // kUnicodeReplacementCharacter into |*code_point|. |
| 172 // | 172 // |
| 173 // |*begin| will be updated to point to the last character consumed so it | 173 // |*begin| will be updated to point to the last character consumed so it |
| 174 // can be incremented in a loop and will be ready for the next character. | 174 // can be incremented in a loop and will be ready for the next character. |
| 175 // (for a single-byte ASCII character, it will not be changed). | 175 // (for a single-byte ASCII character, it will not be changed). |
| 176 // | 176 // |
| 177 // Implementation is in url_canon_icu.cc. | 177 // Implementation is in url_canon_icu.cc. |
| 178 GURL_API bool ReadUTFChar(const char* str, int* begin, int length, | 178 bool ReadUTFChar(const char* str, int* begin, int length, |
| 179 unsigned* code_point_out); | 179 unsigned* code_point_out); |
| 180 | 180 |
| 181 // Generic To-UTF-8 converter. This will call the given append method for each | 181 // Generic To-UTF-8 converter. This will call the given append method for each |
| 182 // character that should be appended, with the given output method. Wrappers | 182 // character that should be appended, with the given output method. Wrappers |
| 183 // are provided below for escaped and non-escaped versions of this. | 183 // are provided below for escaped and non-escaped versions of this. |
| 184 // | 184 // |
| 185 // The char_value must have already been checked that it's a valid Unicode | 185 // The char_value must have already been checked that it's a valid Unicode |
| 186 // character. | 186 // character. |
| 187 template<class Output, void Appender(unsigned char, Output*)> | 187 template<class Output, void Appender(unsigned char, Output*)> |
| 188 inline void DoAppendUTF8(unsigned char_value, Output* output) { | 188 inline void DoAppendUTF8(unsigned char_value, Output* output) { |
| 189 if (char_value <= 0x7f) { | 189 if (char_value <= 0x7f) { |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 245 // Reads one character in UTF-16 starting at |*begin| in |str| and places | 245 // Reads one character in UTF-16 starting at |*begin| in |str| and places |
| 246 // the decoded value into |*code_point|. If the character is valid, we will | 246 // the decoded value into |*code_point|. If the character is valid, we will |
| 247 // return true. If invalid, we'll return false and put the | 247 // return true. If invalid, we'll return false and put the |
| 248 // kUnicodeReplacementCharacter into |*code_point|. | 248 // kUnicodeReplacementCharacter into |*code_point|. |
| 249 // | 249 // |
| 250 // |*begin| will be updated to point to the last character consumed so it | 250 // |*begin| will be updated to point to the last character consumed so it |
| 251 // can be incremented in a loop and will be ready for the next character. | 251 // can be incremented in a loop and will be ready for the next character. |
| 252 // (for a single-16-bit-word character, it will not be changed). | 252 // (for a single-16-bit-word character, it will not be changed). |
| 253 // | 253 // |
| 254 // Implementation is in url_canon_icu.cc. | 254 // Implementation is in url_canon_icu.cc. |
| 255 GURL_API bool ReadUTFChar(const char16* str, int* begin, int length, | 255 bool ReadUTFChar(const char16* str, int* begin, int length, |
| 256 unsigned* code_point); | 256 unsigned* code_point); |
| 257 | 257 |
| 258 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. | 258 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. |
| 259 inline void AppendUTF16Value(unsigned code_point, | 259 inline void AppendUTF16Value(unsigned code_point, |
| 260 CanonOutputT<char16>* output) { | 260 CanonOutputT<char16>* output) { |
| 261 if (code_point > 0xffff) { | 261 if (code_point > 0xffff) { |
| 262 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0)); | 262 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0)); |
| 263 output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00)); | 263 output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00)); |
| 264 } else { | 264 } else { |
| 265 output->push_back(static_cast<char16>(code_point)); | 265 output->push_back(static_cast<char16>(code_point)); |
| 266 } | 266 } |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 364 // Misc canonicalization helpers ---------------------------------------------- | 364 // Misc canonicalization helpers ---------------------------------------------- |
| 365 | 365 |
| 366 // Converts between UTF-8 and UTF-16, returning true on successful conversion. | 366 // Converts between UTF-8 and UTF-16, returning true on successful conversion. |
| 367 // The output will be appended to the given canonicalizer output (so make sure | 367 // The output will be appended to the given canonicalizer output (so make sure |
| 368 // it's empty if you want to replace). | 368 // it's empty if you want to replace). |
| 369 // | 369 // |
| 370 // On invalid input, this will still write as much output as possible, | 370 // On invalid input, this will still write as much output as possible, |
| 371 // replacing the invalid characters with the "invalid character". It will | 371 // replacing the invalid characters with the "invalid character". It will |
| 372 // return false in the failure case, and the caller should not continue as | 372 // return false in the failure case, and the caller should not continue as |
| 373 // normal. | 373 // normal. |
| 374 GURL_API bool ConvertUTF16ToUTF8(const char16* input, int input_len, | 374 bool ConvertUTF16ToUTF8(const char16* input, int input_len, |
| 375 CanonOutput* output); | 375 CanonOutput* output); |
| 376 GURL_API bool ConvertUTF8ToUTF16(const char* input, int input_len, | 376 bool ConvertUTF8ToUTF16(const char* input, int input_len, |
| 377 CanonOutputT<char16>* output); | 377 CanonOutputT<char16>* output); |
| 378 | 378 |
| 379 // Converts from UTF-16 to 8-bit using the character set converter. If the | 379 // Converts from UTF-16 to 8-bit using the character set converter. If the |
| 380 // converter is NULL, this will use UTF-8. | 380 // converter is NULL, this will use UTF-8. |
| 381 void ConvertUTF16ToQueryEncoding(const char16* input, | 381 void ConvertUTF16ToQueryEncoding(const char16* input, |
| 382 const url_parse::Component& query, | 382 const url_parse::Component& query, |
| 383 CharsetConverter* converter, | 383 CharsetConverter* converter, |
| 384 CanonOutput* output); | 384 CanonOutput* output); |
| 385 | 385 |
| 386 // Applies the replacements to the given component source. The component source | 386 // Applies the replacements to the given component source. The component source |
| 387 // should be pre-initialized to the "old" base. That is, all pointers will | 387 // should be pre-initialized to the "old" base. That is, all pointers will |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 426 int path_begin_in_output, | 426 int path_begin_in_output, |
| 427 CanonOutput* output); | 427 CanonOutput* output); |
| 428 bool CanonicalizePartialPath(const char16* spec, | 428 bool CanonicalizePartialPath(const char16* spec, |
| 429 const url_parse::Component& path, | 429 const url_parse::Component& path, |
| 430 int path_begin_in_output, | 430 int path_begin_in_output, |
| 431 CanonOutput* output); | 431 CanonOutput* output); |
| 432 | 432 |
| 433 #ifndef WIN32 | 433 #ifndef WIN32 |
| 434 | 434 |
| 435 // Implementations of Windows' int-to-string conversions | 435 // Implementations of Windows' int-to-string conversions |
| 436 GURL_API int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix); | 436 int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix); |
| 437 GURL_API int _itow_s(int value, char16* buffer, size_t size_in_chars, | 437 int _itow_s(int value, char16* buffer, size_t size_in_chars, |
| 438 int radix); | 438 int radix); |
| 439 | 439 |
| 440 // Secure template overloads for these functions | 440 // Secure template overloads for these functions |
| 441 template<size_t N> | 441 template<size_t N> |
| 442 inline int _itoa_s(int value, char (&buffer)[N], int radix) { | 442 inline int _itoa_s(int value, char (&buffer)[N], int radix) { |
| 443 return _itoa_s(value, buffer, N, radix); | 443 return _itoa_s(value, buffer, N, radix); |
| 444 } | 444 } |
| 445 | 445 |
| 446 template<size_t N> | 446 template<size_t N> |
| 447 inline int _itow_s(int value, char16 (&buffer)[N], int radix) { | 447 inline int _itow_s(int value, char16 (&buffer)[N], int radix) { |
| 448 return _itow_s(value, buffer, N, radix); | 448 return _itow_s(value, buffer, N, radix); |
| 449 } | 449 } |
| 450 | 450 |
| 451 // _strtoui64 and strtoull behave the same | 451 // _strtoui64 and strtoull behave the same |
| 452 inline unsigned long long _strtoui64(const char* nptr, | 452 inline unsigned long long _strtoui64(const char* nptr, |
| 453 char** endptr, int base) { | 453 char** endptr, int base) { |
| 454 return strtoull(nptr, endptr, base); | 454 return strtoull(nptr, endptr, base); |
| 455 } | 455 } |
| 456 | 456 |
| 457 #endif // WIN32 | 457 #endif // WIN32 |
| 458 | 458 |
| 459 } // namespace url_canon | 459 } // namespace url_canon |
| 460 | 460 |
| 461 #endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_H__ | 461 #endif // URL_URL_CANON_INTERNAL_H_ |
| OLD | NEW |