OLD | NEW |
1 // Copyright 2011, Google Inc. | 1 // Copyright 2011, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
11 // copyright notice, this list of conditions and the following disclaimer | 11 // copyright notice, this list of conditions and the following disclaimer |
12 // in the documentation and/or other materials provided with the | 12 // in the documentation and/or other materials provided with the |
13 // distribution. | 13 // distribution. |
14 // * Neither the name of Google Inc. nor the names of its | 14 // * Neither the name of Google Inc. nor the names of its |
15 // contributors may be used to endorse or promote products derived from | 15 // contributors may be used to endorse or promote products derived from |
16 // this software without specific prior written permission. | 16 // this software without specific prior written permission. |
17 // | 17 // |
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 |
| 30 #ifndef URL_URL_CANON_INTERNAL_H_ |
| 31 #define URL_URL_CANON_INTERNAL_H_ |
| 32 |
30 // This file is intended to be included in another C++ file where the character | 33 // This file is intended to be included in another C++ file where the character |
31 // types are defined. This allows us to write mostly generic code, but not have | 34 // types are defined. This allows us to write mostly generic code, but not have |
32 // templace bloat because everything is inlined when anybody calls any of our | 35 // templace bloat because everything is inlined when anybody calls any of our |
33 // functions. | 36 // functions. |
34 | 37 |
35 #ifndef GOOGLEURL_SRC_URL_CANON_INTERNAL_H__ | |
36 #define GOOGLEURL_SRC_URL_CANON_INTERNAL_H__ | |
37 | |
38 #include <stdlib.h> | 38 #include <stdlib.h> |
39 | 39 |
40 #include "base/logging.h" | 40 #include "base/logging.h" |
41 #include "googleurl/src/url_canon.h" | 41 #include "url/url_canon.h" |
42 | 42 |
43 namespace url_canon { | 43 namespace url_canon { |
44 | 44 |
45 // Character type handling ----------------------------------------------------- | 45 // Character type handling ----------------------------------------------------- |
46 | 46 |
47 // Bits that identify different character types. These types identify different | 47 // Bits that identify different character types. These types identify different |
48 // bits that are set for each 8-bit character in the kSharedCharTypeTable. | 48 // bits that are set for each 8-bit character in the kSharedCharTypeTable. |
49 enum SharedCharTypes { | 49 enum SharedCharTypes { |
50 // Characters that do not require escaping in queries. Characters that do | 50 // Characters that do not require escaping in queries. Characters that do |
51 // not have this flag will be escaped; see url_canon_query.cc | 51 // not have this flag will be escaped; see url_canon_query.cc |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
101 // match the given |type| in SharedCharTypes. | 101 // match the given |type| in SharedCharTypes. |
102 void AppendStringOfType(const char* source, int length, | 102 void AppendStringOfType(const char* source, int length, |
103 SharedCharTypes type, | 103 SharedCharTypes type, |
104 CanonOutput* output); | 104 CanonOutput* output); |
105 void AppendStringOfType(const char16* source, int length, | 105 void AppendStringOfType(const char16* source, int length, |
106 SharedCharTypes type, | 106 SharedCharTypes type, |
107 CanonOutput* output); | 107 CanonOutput* output); |
108 | 108 |
109 // Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit | 109 // Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit |
110 // that will be used to represent it. | 110 // that will be used to represent it. |
111 GURL_API extern const char kHexCharLookup[0x10]; | 111 extern const char kHexCharLookup[0x10]; |
112 | 112 |
113 // This lookup table allows fast conversion between ASCII hex letters and their | 113 // This lookup table allows fast conversion between ASCII hex letters and their |
114 // corresponding numerical value. The 8-bit range is divided up into 8 | 114 // corresponding numerical value. The 8-bit range is divided up into 8 |
115 // regions of 0x20 characters each. Each of the three character types (numbers, | 115 // regions of 0x20 characters each. Each of the three character types (numbers, |
116 // uppercase, lowercase) falls into different regions of this range. The table | 116 // uppercase, lowercase) falls into different regions of this range. The table |
117 // contains the amount to subtract from characters in that range to get at | 117 // contains the amount to subtract from characters in that range to get at |
118 // the corresponding numerical value. | 118 // the corresponding numerical value. |
119 // | 119 // |
120 // See HexDigitToValue for the lookup. | 120 // See HexDigitToValue for the lookup. |
121 extern const char kCharToHexLookup[8]; | 121 extern const char kCharToHexLookup[8]; |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
168 // Reads one character in UTF-8 starting at |*begin| in |str| and places | 168 // Reads one character in UTF-8 starting at |*begin| in |str| and places |
169 // the decoded value into |*code_point|. If the character is valid, we will | 169 // the decoded value into |*code_point|. If the character is valid, we will |
170 // return true. If invalid, we'll return false and put the | 170 // return true. If invalid, we'll return false and put the |
171 // kUnicodeReplacementCharacter into |*code_point|. | 171 // kUnicodeReplacementCharacter into |*code_point|. |
172 // | 172 // |
173 // |*begin| will be updated to point to the last character consumed so it | 173 // |*begin| will be updated to point to the last character consumed so it |
174 // can be incremented in a loop and will be ready for the next character. | 174 // can be incremented in a loop and will be ready for the next character. |
175 // (for a single-byte ASCII character, it will not be changed). | 175 // (for a single-byte ASCII character, it will not be changed). |
176 // | 176 // |
177 // Implementation is in url_canon_icu.cc. | 177 // Implementation is in url_canon_icu.cc. |
178 GURL_API bool ReadUTFChar(const char* str, int* begin, int length, | 178 bool ReadUTFChar(const char* str, int* begin, int length, |
179 unsigned* code_point_out); | 179 unsigned* code_point_out); |
180 | 180 |
181 // Generic To-UTF-8 converter. This will call the given append method for each | 181 // Generic To-UTF-8 converter. This will call the given append method for each |
182 // character that should be appended, with the given output method. Wrappers | 182 // character that should be appended, with the given output method. Wrappers |
183 // are provided below for escaped and non-escaped versions of this. | 183 // are provided below for escaped and non-escaped versions of this. |
184 // | 184 // |
185 // The char_value must have already been checked that it's a valid Unicode | 185 // The char_value must have already been checked that it's a valid Unicode |
186 // character. | 186 // character. |
187 template<class Output, void Appender(unsigned char, Output*)> | 187 template<class Output, void Appender(unsigned char, Output*)> |
188 inline void DoAppendUTF8(unsigned char_value, Output* output) { | 188 inline void DoAppendUTF8(unsigned char_value, Output* output) { |
189 if (char_value <= 0x7f) { | 189 if (char_value <= 0x7f) { |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
245 // Reads one character in UTF-16 starting at |*begin| in |str| and places | 245 // Reads one character in UTF-16 starting at |*begin| in |str| and places |
246 // the decoded value into |*code_point|. If the character is valid, we will | 246 // the decoded value into |*code_point|. If the character is valid, we will |
247 // return true. If invalid, we'll return false and put the | 247 // return true. If invalid, we'll return false and put the |
248 // kUnicodeReplacementCharacter into |*code_point|. | 248 // kUnicodeReplacementCharacter into |*code_point|. |
249 // | 249 // |
250 // |*begin| will be updated to point to the last character consumed so it | 250 // |*begin| will be updated to point to the last character consumed so it |
251 // can be incremented in a loop and will be ready for the next character. | 251 // can be incremented in a loop and will be ready for the next character. |
252 // (for a single-16-bit-word character, it will not be changed). | 252 // (for a single-16-bit-word character, it will not be changed). |
253 // | 253 // |
254 // Implementation is in url_canon_icu.cc. | 254 // Implementation is in url_canon_icu.cc. |
255 GURL_API bool ReadUTFChar(const char16* str, int* begin, int length, | 255 bool ReadUTFChar(const char16* str, int* begin, int length, |
256 unsigned* code_point); | 256 unsigned* code_point); |
257 | 257 |
258 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. | 258 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. |
259 inline void AppendUTF16Value(unsigned code_point, | 259 inline void AppendUTF16Value(unsigned code_point, |
260 CanonOutputT<char16>* output) { | 260 CanonOutputT<char16>* output) { |
261 if (code_point > 0xffff) { | 261 if (code_point > 0xffff) { |
262 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0)); | 262 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0)); |
263 output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00)); | 263 output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00)); |
264 } else { | 264 } else { |
265 output->push_back(static_cast<char16>(code_point)); | 265 output->push_back(static_cast<char16>(code_point)); |
266 } | 266 } |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
364 // Misc canonicalization helpers ---------------------------------------------- | 364 // Misc canonicalization helpers ---------------------------------------------- |
365 | 365 |
366 // Converts between UTF-8 and UTF-16, returning true on successful conversion. | 366 // Converts between UTF-8 and UTF-16, returning true on successful conversion. |
367 // The output will be appended to the given canonicalizer output (so make sure | 367 // The output will be appended to the given canonicalizer output (so make sure |
368 // it's empty if you want to replace). | 368 // it's empty if you want to replace). |
369 // | 369 // |
370 // On invalid input, this will still write as much output as possible, | 370 // On invalid input, this will still write as much output as possible, |
371 // replacing the invalid characters with the "invalid character". It will | 371 // replacing the invalid characters with the "invalid character". It will |
372 // return false in the failure case, and the caller should not continue as | 372 // return false in the failure case, and the caller should not continue as |
373 // normal. | 373 // normal. |
374 GURL_API bool ConvertUTF16ToUTF8(const char16* input, int input_len, | 374 bool ConvertUTF16ToUTF8(const char16* input, int input_len, |
375 CanonOutput* output); | 375 CanonOutput* output); |
376 GURL_API bool ConvertUTF8ToUTF16(const char* input, int input_len, | 376 bool ConvertUTF8ToUTF16(const char* input, int input_len, |
377 CanonOutputT<char16>* output); | 377 CanonOutputT<char16>* output); |
378 | 378 |
379 // Converts from UTF-16 to 8-bit using the character set converter. If the | 379 // Converts from UTF-16 to 8-bit using the character set converter. If the |
380 // converter is NULL, this will use UTF-8. | 380 // converter is NULL, this will use UTF-8. |
381 void ConvertUTF16ToQueryEncoding(const char16* input, | 381 void ConvertUTF16ToQueryEncoding(const char16* input, |
382 const url_parse::Component& query, | 382 const url_parse::Component& query, |
383 CharsetConverter* converter, | 383 CharsetConverter* converter, |
384 CanonOutput* output); | 384 CanonOutput* output); |
385 | 385 |
386 // Applies the replacements to the given component source. The component source | 386 // Applies the replacements to the given component source. The component source |
387 // should be pre-initialized to the "old" base. That is, all pointers will | 387 // should be pre-initialized to the "old" base. That is, all pointers will |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
426 int path_begin_in_output, | 426 int path_begin_in_output, |
427 CanonOutput* output); | 427 CanonOutput* output); |
428 bool CanonicalizePartialPath(const char16* spec, | 428 bool CanonicalizePartialPath(const char16* spec, |
429 const url_parse::Component& path, | 429 const url_parse::Component& path, |
430 int path_begin_in_output, | 430 int path_begin_in_output, |
431 CanonOutput* output); | 431 CanonOutput* output); |
432 | 432 |
433 #ifndef WIN32 | 433 #ifndef WIN32 |
434 | 434 |
435 // Implementations of Windows' int-to-string conversions | 435 // Implementations of Windows' int-to-string conversions |
436 GURL_API int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix); | 436 int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix); |
437 GURL_API int _itow_s(int value, char16* buffer, size_t size_in_chars, | 437 int _itow_s(int value, char16* buffer, size_t size_in_chars, |
438 int radix); | 438 int radix); |
439 | 439 |
440 // Secure template overloads for these functions | 440 // Secure template overloads for these functions |
441 template<size_t N> | 441 template<size_t N> |
442 inline int _itoa_s(int value, char (&buffer)[N], int radix) { | 442 inline int _itoa_s(int value, char (&buffer)[N], int radix) { |
443 return _itoa_s(value, buffer, N, radix); | 443 return _itoa_s(value, buffer, N, radix); |
444 } | 444 } |
445 | 445 |
446 template<size_t N> | 446 template<size_t N> |
447 inline int _itow_s(int value, char16 (&buffer)[N], int radix) { | 447 inline int _itow_s(int value, char16 (&buffer)[N], int radix) { |
448 return _itow_s(value, buffer, N, radix); | 448 return _itow_s(value, buffer, N, radix); |
449 } | 449 } |
450 | 450 |
451 // _strtoui64 and strtoull behave the same | 451 // _strtoui64 and strtoull behave the same |
452 inline unsigned long long _strtoui64(const char* nptr, | 452 inline unsigned long long _strtoui64(const char* nptr, |
453 char** endptr, int base) { | 453 char** endptr, int base) { |
454 return strtoull(nptr, endptr, base); | 454 return strtoull(nptr, endptr, base); |
455 } | 455 } |
456 | 456 |
457 #endif // WIN32 | 457 #endif // WIN32 |
458 | 458 |
459 } // namespace url_canon | 459 } // namespace url_canon |
460 | 460 |
461 #endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_H__ | 461 #endif // URL_URL_CANON_INTERNAL_H_ |
OLD | NEW |