Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(464)

Side by Side Diff: src/url_canon_internal.h

Issue 118062: Moving ICU dependent function ReadUTFChar into the icu.cc file.... (Closed) Base URL: http://google-url.googlecode.com/svn/trunk/
Patch Set: '' Created 11 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/url_canon_icu.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2007, Google Inc. 1 // Copyright 2007, Google Inc.
2 // All rights reserved. 2 // All rights reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are 5 // modification, are permitted provided that the following conditions are
6 // met: 6 // met:
7 // 7 //
8 // * Redistributions of source code must retain the above copyright 8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer. 9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above 10 // * Redistributions in binary form must reproduce the above
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
156 // UTF-8 functions ------------------------------------------------------------ 156 // UTF-8 functions ------------------------------------------------------------
157 157
158 // Reads one character in UTF-8 starting at |*begin| in |str| and places 158 // Reads one character in UTF-8 starting at |*begin| in |str| and places
159 // the decoded value into |*code_point|. If the character is valid, we will 159 // the decoded value into |*code_point|. If the character is valid, we will
160 // return true. If invalid, we'll return false and put the 160 // return true. If invalid, we'll return false and put the
161 // kUnicodeReplacementCharacter into |*code_point|. 161 // kUnicodeReplacementCharacter into |*code_point|.
162 // 162 //
163 // |*begin| will be updated to point to the last character consumed so it 163 // |*begin| will be updated to point to the last character consumed so it
164 // can be incremented in a loop and will be ready for the next character. 164 // can be incremented in a loop and will be ready for the next character.
165 // (for a single-byte ASCII character, it will not be changed). 165 // (for a single-byte ASCII character, it will not be changed).
166 inline bool ReadUTFChar(const char* str, int* begin, int length, 166 //
167 unsigned* code_point_out) { 167 // Implementation is in url_canon_icu.cc.
168 int code_point; // Avoids warning when U8_NEXT writes -1 to it. 168 bool ReadUTFChar(const char* str, int* begin, int length,
169 U8_NEXT(str, *begin, length, code_point); 169 unsigned* code_point_out);
170 *code_point_out = static_cast<unsigned>(code_point);
171
172 // The ICU macro above moves to the next char, we want to point to the last
173 // char consumed.
174 (*begin)--;
175
176 // Validate the decoded value.
177 if (U_IS_UNICODE_CHAR(code_point))
178 return true;
179 *code_point_out = kUnicodeReplacementCharacter;
180 return false;
181 }
182 170
183 // Generic To-UTF-8 converter. This will call the given append method for each 171 // Generic To-UTF-8 converter. This will call the given append method for each
184 // character that should be appended, with the given output method. Wrappers 172 // character that should be appended, with the given output method. Wrappers
185 // are provided below for escaped and non-escaped versions of this. 173 // are provided below for escaped and non-escaped versions of this.
186 template<class Output, void Appender(unsigned char, Output*)> 174 template<class Output, void Appender(unsigned char, Output*)>
187 inline void DoAppendUTF8(unsigned char_value, Output* output) { 175 inline void DoAppendUTF8(unsigned char_value, Output* output) {
188 if (char_value <= 0x7f) { 176 if (char_value <= 0x7f) {
189 Appender(static_cast<unsigned char>(char_value), output); 177 Appender(static_cast<unsigned char>(char_value), output);
190 } else if (char_value <= 0x7ff) { 178 } else if (char_value <= 0x7ff) {
191 // 110xxxxx 10xxxxxx 179 // 110xxxxx 10xxxxxx
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
253 // UTF-16 functions ----------------------------------------------------------- 241 // UTF-16 functions -----------------------------------------------------------
254 242
255 // Reads one character in UTF-16 starting at |*begin| in |str| and places 243 // Reads one character in UTF-16 starting at |*begin| in |str| and places
256 // the decoded value into |*code_point|. If the character is valid, we will 244 // the decoded value into |*code_point|. If the character is valid, we will
257 // return true. If invalid, we'll return false and put the 245 // return true. If invalid, we'll return false and put the
258 // kUnicodeReplacementCharacter into |*code_point|. 246 // kUnicodeReplacementCharacter into |*code_point|.
259 // 247 //
260 // |*begin| will be updated to point to the last character consumed so it 248 // |*begin| will be updated to point to the last character consumed so it
261 // can be incremented in a loop and will be ready for the next character. 249 // can be incremented in a loop and will be ready for the next character.
262 // (for a single-16-bit-word character, it will not be changed). 250 // (for a single-16-bit-word character, it will not be changed).
263 inline bool ReadUTFChar(const char16* str, int* begin, int length, 251 //
264 unsigned* code_point) { 252 // Implementation is in url_canon_icu.cc.
265 if (U16_IS_SURROGATE(str[*begin])) { 253 bool ReadUTFChar(const char16* str, int* begin, int length,
266 if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length || 254 unsigned* code_point);
267 !U16_IS_TRAIL(str[*begin + 1])) {
268 // Invalid surrogate pair.
269 *code_point = kUnicodeReplacementCharacter;
270 return false;
271 } else {
272 // Valid surrogate pair.
273 *code_point = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]);
274 (*begin)++;
275 }
276 } else {
277 // Not a surrogate, just one 16-bit word.
278 *code_point = str[*begin];
279 }
280
281 if (U_IS_UNICODE_CHAR(*code_point))
282 return true;
283
284 // Invalid code point.
285 *code_point = kUnicodeReplacementCharacter;
286 return false;
287 }
288 255
289 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. 256 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
290 inline void AppendUTF16Value(unsigned code_point, 257 inline void AppendUTF16Value(unsigned code_point,
291 CanonOutputT<char16>* output) { 258 CanonOutputT<char16>* output) {
292 if (code_point > 0xffff) { 259 if (code_point > 0xffff) {
293 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0)); 260 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0));
294 output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00)); 261 output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00));
295 } else { 262 } else {
296 output->push_back(static_cast<char16>(code_point)); 263 output->push_back(static_cast<char16>(code_point));
297 } 264 }
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
482 inline unsigned long long _strtoui64(const char* nptr, 449 inline unsigned long long _strtoui64(const char* nptr,
483 char** endptr, int base) { 450 char** endptr, int base) {
484 return strtoull(nptr, endptr, base); 451 return strtoull(nptr, endptr, base);
485 } 452 }
486 453
487 #endif // WIN32 454 #endif // WIN32
488 455
489 } // namespace url_canon 456 } // namespace url_canon
490 457
491 #endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_H__ 458 #endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_H__
OLDNEW
« no previous file with comments | « src/url_canon_icu.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698