src/url_canon_internal.h - Issue 118062: Moving ICU dependent function ReadUTFChar into the icu.cc file....

Side by Side Diff: src/url_canon_internal.h

Issue 118062: Moving ICU dependent function ReadUTFChar into the icu.cc file.... (Closed) Base URL: http://google-url.googlecode.com/svn/trunk/

Patch Set: '' Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2007, Google Inc.	1 // Copyright 2007, Google Inc.

2 // All rights reserved.	2 // All rights reserved.

3 //	3 //

4 // Redistribution and use in source and binary forms, with or without	4 // Redistribution and use in source and binary forms, with or without

5 // modification, are permitted provided that the following conditions are	5 // modification, are permitted provided that the following conditions are

6 // met:	6 // met:

7 //	7 //

8 // * Redistributions of source code must retain the above copyright	8 // * Redistributions of source code must retain the above copyright

9 // notice, this list of conditions and the following disclaimer.	9 // notice, this list of conditions and the following disclaimer.

10 // * Redistributions in binary form must reproduce the above	10 // * Redistributions in binary form must reproduce the above

(...skipping 145 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
156 // UTF-8 functions ------------------------------------------------------------	156 // UTF-8 functions ------------------------------------------------------------

157	157

158 // Reads one character in UTF-8 starting at \|*begin\| in \|str\| and places	158 // Reads one character in UTF-8 starting at \|*begin\| in \|str\| and places

159 // the decoded value into \|*code_point\|. If the character is valid, we will	159 // the decoded value into \|*code_point\|. If the character is valid, we will

160 // return true. If invalid, we'll return false and put the	160 // return true. If invalid, we'll return false and put the

161 // kUnicodeReplacementCharacter into \|*code_point\|.	161 // kUnicodeReplacementCharacter into \|*code_point\|.

162 //	162 //

163 // \|*begin\| will be updated to point to the last character consumed so it	163 // \|*begin\| will be updated to point to the last character consumed so it

164 // can be incremented in a loop and will be ready for the next character.	164 // can be incremented in a loop and will be ready for the next character.

165 // (for a single-byte ASCII character, it will not be changed).	165 // (for a single-byte ASCII character, it will not be changed).

166 inline bool ReadUTFChar(const char* str, int* begin, int length,	166 //

167 unsigned* code_point_out) {	167 // Implementation is in url_canon_icu.cc.

168 int code_point; // Avoids warning when U8_NEXT writes -1 to it.	168 bool ReadUTFChar(const char* str, int* begin, int length,

169 U8_NEXT(str, *begin, length, code_point);	169 unsigned* code_point_out);

170 *code_point_out = static_cast<unsigned>(code_point);

171

172 // The ICU macro above moves to the next char, we want to point to the last

173 // char consumed.

174 (*begin)--;

175

176 // Validate the decoded value.

177 if (U_IS_UNICODE_CHAR(code_point))

178 return true;

179 *code_point_out = kUnicodeReplacementCharacter;

180 return false;

181 }

182	170

183 // Generic To-UTF-8 converter. This will call the given append method for each	171 // Generic To-UTF-8 converter. This will call the given append method for each

184 // character that should be appended, with the given output method. Wrappers	172 // character that should be appended, with the given output method. Wrappers

185 // are provided below for escaped and non-escaped versions of this.	173 // are provided below for escaped and non-escaped versions of this.

186 template<class Output, void Appender(unsigned char, Output*)>	174 template<class Output, void Appender(unsigned char, Output*)>

187 inline void DoAppendUTF8(unsigned char_value, Output* output) {	175 inline void DoAppendUTF8(unsigned char_value, Output* output) {

188 if (char_value <= 0x7f) {	176 if (char_value <= 0x7f) {

189 Appender(static_cast<unsigned char>(char_value), output);	177 Appender(static_cast<unsigned char>(char_value), output);

190 } else if (char_value <= 0x7ff) {	178 } else if (char_value <= 0x7ff) {

191 // 110xxxxx 10xxxxxx	179 // 110xxxxx 10xxxxxx

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
253 // UTF-16 functions -----------------------------------------------------------	241 // UTF-16 functions -----------------------------------------------------------

254	242

255 // Reads one character in UTF-16 starting at \|*begin\| in \|str\| and places	243 // Reads one character in UTF-16 starting at \|*begin\| in \|str\| and places

256 // the decoded value into \|*code_point\|. If the character is valid, we will	244 // the decoded value into \|*code_point\|. If the character is valid, we will

257 // return true. If invalid, we'll return false and put the	245 // return true. If invalid, we'll return false and put the

258 // kUnicodeReplacementCharacter into \|*code_point\|.	246 // kUnicodeReplacementCharacter into \|*code_point\|.

259 //	247 //

260 // \|*begin\| will be updated to point to the last character consumed so it	248 // \|*begin\| will be updated to point to the last character consumed so it

261 // can be incremented in a loop and will be ready for the next character.	249 // can be incremented in a loop and will be ready for the next character.

262 // (for a single-16-bit-word character, it will not be changed).	250 // (for a single-16-bit-word character, it will not be changed).

263 inline bool ReadUTFChar(const char16* str, int* begin, int length,	251 //

264 unsigned* code_point) {	252 // Implementation is in url_canon_icu.cc.

265 if (U16_IS_SURROGATE(str[*begin])) {	253 bool ReadUTFChar(const char16* str, int* begin, int length,

266 if (!U16_IS_SURROGATE_LEAD(str[begin]) \|\| begin + 1 >= length \|\|	254 unsigned* code_point);

267 !U16_IS_TRAIL(str[*begin + 1])) {

268 // Invalid surrogate pair.

269 *code_point = kUnicodeReplacementCharacter;

270 return false;

271 } else {

272 // Valid surrogate pair.

273 code_point = U16_GET_SUPPLEMENTARY(str[begin], str[*begin + 1]);

274 (*begin)++;

275 }

276 } else {

277 // Not a surrogate, just one 16-bit word.

278 code_point = str[begin];

279 }

280

281 if (U_IS_UNICODE_CHAR(*code_point))

282 return true;

283

284 // Invalid code point.

285 *code_point = kUnicodeReplacementCharacter;

286 return false;

287 }

288	255

289 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.	256 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.

290 inline void AppendUTF16Value(unsigned code_point,	257 inline void AppendUTF16Value(unsigned code_point,

291 CanonOutputT<char16>* output) {	258 CanonOutputT<char16>* output) {

292 if (code_point > 0xffff) {	259 if (code_point > 0xffff) {

293 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0));	260 output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0));

294 output->push_back(static_cast<char16>((code_point & 0x3ff) \| 0xdc00));	261 output->push_back(static_cast<char16>((code_point & 0x3ff) \| 0xdc00));

295 } else {	262 } else {

296 output->push_back(static_cast<char16>(code_point));	263 output->push_back(static_cast<char16>(code_point));

297 }	264 }

(...skipping 184 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
482 inline unsigned long long _strtoui64(const char* nptr,	449 inline unsigned long long _strtoui64(const char* nptr,

483 char** endptr, int base) {	450 char** endptr, int base) {

484 return strtoull(nptr, endptr, base);	451 return strtoull(nptr, endptr, base);

485 }	452 }

486	453

487 #endif // WIN32	454 #endif // WIN32

488	455

489 } // namespace url_canon	456 } // namespace url_canon

490	457

491 #endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_H__	458 #endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_H__

OLD	NEW

« no previous file with comments | « src/url_canon_icu.cc ('k') | no next file » | no next file with comments »