OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef URL_URL_CANON_INTERNAL_H_ | 5 #ifndef URL_URL_CANON_INTERNAL_H_ |
6 #define URL_URL_CANON_INTERNAL_H_ | 6 #define URL_URL_CANON_INTERNAL_H_ |
7 | 7 |
8 // This file is intended to be included in another C++ file where the character | 8 // This file is intended to be included in another C++ file where the character |
9 // types are defined. This allows us to write mostly generic code, but not have | 9 // types are defined. This allows us to write mostly generic code, but not have |
10 // templace bloat because everything is inlined when anybody calls any of our | 10 // templace bloat because everything is inlined when anybody calls any of our |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
76 // match the given |type| in SharedCharTypes. | 76 // match the given |type| in SharedCharTypes. |
77 void AppendStringOfType(const char* source, int length, | 77 void AppendStringOfType(const char* source, int length, |
78 SharedCharTypes type, | 78 SharedCharTypes type, |
79 CanonOutput* output); | 79 CanonOutput* output); |
80 void AppendStringOfType(const char16* source, int length, | 80 void AppendStringOfType(const char16* source, int length, |
81 SharedCharTypes type, | 81 SharedCharTypes type, |
82 CanonOutput* output); | 82 CanonOutput* output); |
83 | 83 |
84 // Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit | 84 // Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit |
85 // that will be used to represent it. | 85 // that will be used to represent it. |
86 URL_EXPORT extern const char kHexCharLookup[0x10]; | 86 extern const char kHexCharLookup[0x10]; |
87 | 87 |
88 // This lookup table allows fast conversion between ASCII hex letters and their | 88 // This lookup table allows fast conversion between ASCII hex letters and their |
89 // corresponding numerical value. The 8-bit range is divided up into 8 | 89 // corresponding numerical value. The 8-bit range is divided up into 8 |
90 // regions of 0x20 characters each. Each of the three character types (numbers, | 90 // regions of 0x20 characters each. Each of the three character types (numbers, |
91 // uppercase, lowercase) falls into different regions of this range. The table | 91 // uppercase, lowercase) falls into different regions of this range. The table |
92 // contains the amount to subtract from characters in that range to get at | 92 // contains the amount to subtract from characters in that range to get at |
93 // the corresponding numerical value. | 93 // the corresponding numerical value. |
94 // | 94 // |
95 // See HexDigitToValue for the lookup. | 95 // See HexDigitToValue for the lookup. |
96 extern const char kCharToHexLookup[8]; | 96 extern const char kCharToHexLookup[8]; |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
143 // Reads one character in UTF-8 starting at |*begin| in |str| and places | 143 // Reads one character in UTF-8 starting at |*begin| in |str| and places |
144 // the decoded value into |*code_point|. If the character is valid, we will | 144 // the decoded value into |*code_point|. If the character is valid, we will |
145 // return true. If invalid, we'll return false and put the | 145 // return true. If invalid, we'll return false and put the |
146 // kUnicodeReplacementCharacter into |*code_point|. | 146 // kUnicodeReplacementCharacter into |*code_point|. |
147 // | 147 // |
148 // |*begin| will be updated to point to the last character consumed so it | 148 // |*begin| will be updated to point to the last character consumed so it |
149 // can be incremented in a loop and will be ready for the next character. | 149 // can be incremented in a loop and will be ready for the next character. |
150 // (for a single-byte ASCII character, it will not be changed). | 150 // (for a single-byte ASCII character, it will not be changed). |
151 // | 151 // |
152 // Implementation is in url_canon_icu.cc. | 152 // Implementation is in url_canon_icu.cc. |
153 URL_EXPORT bool ReadUTFChar(const char* str, int* begin, int length, | 153 bool ReadUTFChar(const char* str, int* begin, int length, |
154 unsigned* code_point_out); | 154 unsigned* code_point_out); |
155 | 155 |
156 // Generic To-UTF-8 converter. This will call the given append method for each | 156 // Generic To-UTF-8 converter. This will call the given append method for each |
157 // character that should be appended, with the given output method. Wrappers | 157 // character that should be appended, with the given output method. Wrappers |
158 // are provided below for escaped and non-escaped versions of this. | 158 // are provided below for escaped and non-escaped versions of this. |
159 // | 159 // |
160 // The char_value must have already been checked that it's a valid Unicode | 160 // The char_value must have already been checked that it's a valid Unicode |
161 // character. | 161 // character. |
162 template<class Output, void Appender(unsigned char, Output*)> | 162 template<class Output, void Appender(unsigned char, Output*)> |
163 inline void DoAppendUTF8(unsigned char_value, Output* output) { | 163 inline void DoAppendUTF8(unsigned char_value, Output* output) { |
164 if (char_value <= 0x7f) { | 164 if (char_value <= 0x7f) { |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
220 // Reads one character in UTF-16 starting at |*begin| in |str| and places | 220 // Reads one character in UTF-16 starting at |*begin| in |str| and places |
221 // the decoded value into |*code_point|. If the character is valid, we will | 221 // the decoded value into |*code_point|. If the character is valid, we will |
222 // return true. If invalid, we'll return false and put the | 222 // return true. If invalid, we'll return false and put the |
223 // kUnicodeReplacementCharacter into |*code_point|. | 223 // kUnicodeReplacementCharacter into |*code_point|. |
224 // | 224 // |
225 // |*begin| will be updated to point to the last character consumed so it | 225 // |*begin| will be updated to point to the last character consumed so it |
226 // can be incremented in a loop and will be ready for the next character. | 226 // can be incremented in a loop and will be ready for the next character. |
227 // (for a single-16-bit-word character, it will not be changed). | 227 // (for a single-16-bit-word character, it will not be changed). |
228 // | 228 // |
229 // Implementation is in url_canon_icu.cc. | 229 // Implementation is in url_canon_icu.cc. |
230 URL_EXPORT bool ReadUTFChar(const char16* str, int* begin, int length, | 230 bool ReadUTFChar(const char16* str, int* begin, int length, |
231 unsigned* code_point); | 231 unsigned* code_point); |
232 | 232 |
233 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. | 233 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. |
234 inline void AppendUTF16Value(unsigned code_point, | 234 inline void AppendUTF16Value(unsigned code_point, |
235 CanonOutputT<char16>* output) { | 235 CanonOutputT<char16>* output) { |
236 if (code_point > 0xffff) { | 236 if (code_point > 0xffff) { |
237 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0)); | 237 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0)); |
238 output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00)); | 238 output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00)); |
239 } else { | 239 } else { |
240 output->push_back(static_cast<char16>(code_point)); | 240 output->push_back(static_cast<char16>(code_point)); |
241 } | 241 } |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
339 // Misc canonicalization helpers ---------------------------------------------- | 339 // Misc canonicalization helpers ---------------------------------------------- |
340 | 340 |
341 // Converts between UTF-8 and UTF-16, returning true on successful conversion. | 341 // Converts between UTF-8 and UTF-16, returning true on successful conversion. |
342 // The output will be appended to the given canonicalizer output (so make sure | 342 // The output will be appended to the given canonicalizer output (so make sure |
343 // it's empty if you want to replace). | 343 // it's empty if you want to replace). |
344 // | 344 // |
345 // On invalid input, this will still write as much output as possible, | 345 // On invalid input, this will still write as much output as possible, |
346 // replacing the invalid characters with the "invalid character". It will | 346 // replacing the invalid characters with the "invalid character". It will |
347 // return false in the failure case, and the caller should not continue as | 347 // return false in the failure case, and the caller should not continue as |
348 // normal. | 348 // normal. |
349 URL_EXPORT bool ConvertUTF16ToUTF8(const char16* input, int input_len, | 349 bool ConvertUTF16ToUTF8(const char16* input, int input_len, |
350 CanonOutput* output); | 350 CanonOutput* output); |
351 URL_EXPORT bool ConvertUTF8ToUTF16(const char* input, int input_len, | 351 bool ConvertUTF8ToUTF16(const char* input, int input_len, |
352 CanonOutputT<char16>* output); | 352 CanonOutputT<char16>* output); |
353 | 353 |
354 // Converts from UTF-16 to 8-bit using the character set converter. If the | 354 // Converts from UTF-16 to 8-bit using the character set converter. If the |
355 // converter is NULL, this will use UTF-8. | 355 // converter is NULL, this will use UTF-8. |
356 void ConvertUTF16ToQueryEncoding(const char16* input, | 356 void ConvertUTF16ToQueryEncoding(const char16* input, |
357 const url_parse::Component& query, | 357 const url_parse::Component& query, |
358 CharsetConverter* converter, | 358 CharsetConverter* converter, |
359 CanonOutput* output); | 359 CanonOutput* output); |
360 | 360 |
361 // Applies the replacements to the given component source. The component source | 361 // Applies the replacements to the given component source. The component source |
362 // should be pre-initialized to the "old" base. That is, all pointers will | 362 // should be pre-initialized to the "old" base. That is, all pointers will |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
401 int path_begin_in_output, | 401 int path_begin_in_output, |
402 CanonOutput* output); | 402 CanonOutput* output); |
403 bool CanonicalizePartialPath(const char16* spec, | 403 bool CanonicalizePartialPath(const char16* spec, |
404 const url_parse::Component& path, | 404 const url_parse::Component& path, |
405 int path_begin_in_output, | 405 int path_begin_in_output, |
406 CanonOutput* output); | 406 CanonOutput* output); |
407 | 407 |
408 #ifndef WIN32 | 408 #ifndef WIN32 |
409 | 409 |
410 // Implementations of Windows' int-to-string conversions | 410 // Implementations of Windows' int-to-string conversions |
411 URL_EXPORT int _itoa_s(int value, char* buffer, size_t size_in_chars, | 411 int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix); |
412 int radix); | 412 int _itow_s(int value, char16* buffer, size_t size_in_chars, |
413 URL_EXPORT int _itow_s(int value, char16* buffer, size_t size_in_chars, | 413 int radix); |
414 int radix); | |
415 | 414 |
416 // Secure template overloads for these functions | 415 // Secure template overloads for these functions |
417 template<size_t N> | 416 template<size_t N> |
418 inline int _itoa_s(int value, char (&buffer)[N], int radix) { | 417 inline int _itoa_s(int value, char (&buffer)[N], int radix) { |
419 return _itoa_s(value, buffer, N, radix); | 418 return _itoa_s(value, buffer, N, radix); |
420 } | 419 } |
421 | 420 |
422 template<size_t N> | 421 template<size_t N> |
423 inline int _itow_s(int value, char16 (&buffer)[N], int radix) { | 422 inline int _itow_s(int value, char16 (&buffer)[N], int radix) { |
424 return _itow_s(value, buffer, N, radix); | 423 return _itow_s(value, buffer, N, radix); |
425 } | 424 } |
426 | 425 |
427 // _strtoui64 and strtoull behave the same | 426 // _strtoui64 and strtoull behave the same |
428 inline unsigned long long _strtoui64(const char* nptr, | 427 inline unsigned long long _strtoui64(const char* nptr, |
429 char** endptr, int base) { | 428 char** endptr, int base) { |
430 return strtoull(nptr, endptr, base); | 429 return strtoull(nptr, endptr, base); |
431 } | 430 } |
432 | 431 |
433 #endif // WIN32 | 432 #endif // WIN32 |
434 | 433 |
435 } // namespace url_canon | 434 } // namespace url_canon |
436 | 435 |
437 #endif // URL_URL_CANON_INTERNAL_H_ | 436 #endif // URL_URL_CANON_INTERNAL_H_ |
OLD | NEW |