url/url_canon_internal.h - Issue 1270443006: Proof-read comments in src/url/.

Side by Side Diff: url/url_canon_internal.h

Issue 1270443006: Proof-read comments in src/url/. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« url/gurl.h ('K') | « url/url_canon_host.cc ('k') | url/url_canon_internal.cc » ('j') | no next file with comments »

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef URL_URL_CANON_INTERNAL_H_	5 #ifndef URL_URL_CANON_INTERNAL_H_

6 #define URL_URL_CANON_INTERNAL_H_	6 #define URL_URL_CANON_INTERNAL_H_

7	7

8 // This file is intended to be included in another C++ file where the character	8 // This file is intended to be included in another C++ file where the character

9 // types are defined. This allows us to write mostly generic code, but not have	9 // types are defined. This allows us to write mostly generic code, but not have

10 // templace bloat because everything is inlined when anybody calls any of our	10 // template bloat because everything is inlined when anybody calls any of our

11 // functions.	11 // functions.

12	12

13 #include <stdlib.h>	13 #include <stdlib.h>

14	14

15 #include "base/logging.h"	15 #include "base/logging.h"

16 #include "url/url_canon.h"	16 #include "url/url_canon.h"

17	17

18 namespace url {	18 namespace url {

19	19

20 // Character type handling -----------------------------------------------------	20 // Character type handling -----------------------------------------------------

(...skipping 13 matching lines...) Expand all Loading...
34	34

35 // Valid in an ASCII-representation of a hex digit (as in %-escaped).	35 // Valid in an ASCII-representation of a hex digit (as in %-escaped).

36 CHAR_HEX = 8,	36 CHAR_HEX = 8,

37	37

38 // Valid in an ASCII-representation of a decimal digit.	38 // Valid in an ASCII-representation of a decimal digit.

39 CHAR_DEC = 16,	39 CHAR_DEC = 16,

40	40

41 // Valid in an ASCII-representation of an octal digit.	41 // Valid in an ASCII-representation of an octal digit.

42 CHAR_OCT = 32,	42 CHAR_OCT = 32,

43	43

44 // Characters that do not require escaping in encodeURIComponent. Characters	44 // Characters that do not require escaping in encodeURIComponent. Characters

45 // that do not have this flag will be escaped; see url_util.cc.	45 // that do not have this flag will be escaped; see url_util.cc.

46 CHAR_COMPONENT = 64,	46 CHAR_COMPONENT = 64,

47 };	47 };

48	48

49 // This table contains the flags in SharedCharTypes for each 8-bit character.	49 // This table contains the flags in SharedCharTypes for each 8-bit character.

50 // Some canonicalization functions have their own specialized lookup table.	50 // Some canonicalization functions have their own specialized lookup table.

51 // For those with simple requirements, we have collected the flags in one	51 // For those with simple requirements, we have collected the flags in one

52 // place so there are fewer lookup tables to load into the CPU cache.	52 // place so there are fewer lookup tables to load into the CPU cache.

53 //	53 //

54 // Using an unsigned char type has a small but measurable performance benefit	54 // Using an unsigned char type has a small but measurable performance benefit

(...skipping 113 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
168 Appender(static_cast<unsigned char>(0x80 \| (char_value & 0x3f)),	168 Appender(static_cast<unsigned char>(0x80 \| (char_value & 0x3f)),

169 output);	169 output);

170 } else if (char_value <= 0xffff) {	170 } else if (char_value <= 0xffff) {

171 // 1110xxxx 10xxxxxx 10xxxxxx	171 // 1110xxxx 10xxxxxx 10xxxxxx

172 Appender(static_cast<unsigned char>(0xe0 \| (char_value >> 12)),	172 Appender(static_cast<unsigned char>(0xe0 \| (char_value >> 12)),

173 output);	173 output);

174 Appender(static_cast<unsigned char>(0x80 \| ((char_value >> 6) & 0x3f)),	174 Appender(static_cast<unsigned char>(0x80 \| ((char_value >> 6) & 0x3f)),

175 output);	175 output);

176 Appender(static_cast<unsigned char>(0x80 \| (char_value & 0x3f)),	176 Appender(static_cast<unsigned char>(0x80 \| (char_value & 0x3f)),

177 output);	177 output);

178 } else if (char_value <= 0x10FFFF) { // Max unicode code point.	178 } else if (char_value <= 0x10FFFF) { // Max Unicode code point.

179 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx	179 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

180 Appender(static_cast<unsigned char>(0xf0 \| (char_value >> 18)),	180 Appender(static_cast<unsigned char>(0xf0 \| (char_value >> 18)),

181 output);	181 output);

182 Appender(static_cast<unsigned char>(0x80 \| ((char_value >> 12) & 0x3f)),	182 Appender(static_cast<unsigned char>(0x80 \| ((char_value >> 12) & 0x3f)),

183 output);	183 output);

184 Appender(static_cast<unsigned char>(0x80 \| ((char_value >> 6) & 0x3f)),	184 Appender(static_cast<unsigned char>(0x80 \| ((char_value >> 6) & 0x3f)),

185 output);	185 output);

186 Appender(static_cast<unsigned char>(0x80 \| (char_value & 0x3f)),	186 Appender(static_cast<unsigned char>(0x80 \| (char_value & 0x3f)),

187 output);	187 output);

188 } else {	188 } else {

189 // Invalid UTF-8 character (>20 bits).	189 // Invalid UTF-8 character (>20 bits).

190 NOTREACHED();	190 NOTREACHED();

191 }	191 }

192 }	192 }

193	193

194 // Helper used by AppendUTF8Value below. We use an unsigned parameter so there	194 // Helper used by AppendUTF8Value below. We use an unsigned parameter so there

195 // are no funny sign problems with the input, but then have to convert it to	195 // are no funny sign problems with the input, but then have to convert it to

196 // a regular char for appending.	196 // a regular char for appending.

197 inline void AppendCharToOutput(unsigned char ch, CanonOutput* output) {	197 inline void AppendCharToOutput(unsigned char ch, CanonOutput* output) {

198 output->push_back(static_cast<char>(ch));	198 output->push_back(static_cast<char>(ch));

199 }	199 }

200	200

201 // Writes the given character to the output as UTF-8. This does NO checking	201 // Writes the given character to the output as UTF-8. This does NO checking

202 // of the validity of the unicode characters; the caller should ensure that	202 // of the validity of the Unicode characters; the caller should ensure that

203 // the value it is appending is valid to append.	203 // the value it is appending is valid to append.

204 inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {	204 inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {

205 DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);	205 DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);

206 }	206 }

207	207

208 // Writes the given character to the output as UTF-8, escaping ALL	208 // Writes the given character to the output as UTF-8, escaping ALL

209 // characters (even when they are ASCII). This does NO checking of the	209 // characters (even when they are ASCII). This does NO checking of the

210 // validity of the unicode characters; the caller should ensure that the value	210 // validity of the Unicode characters; the caller should ensure that the value

211 // it is appending is valid to append.	211 // it is appending is valid to append.

212 inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {	212 inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {

213 DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);	213 DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);

214 }	214 }

215	215

216 // UTF-16 functions -----------------------------------------------------------	216 // UTF-16 functions -----------------------------------------------------------

217	217

218 // Reads one character in UTF-16 starting at \|*begin\| in \|str\| and places	218 // Reads one character in UTF-16 starting at \|*begin\| in \|str\| and places

219 // the decoded value into \|*code_point\|. If the character is valid, we will	219 // the decoded value into \|*code_point\|. If the character is valid, we will

220 // return true. If invalid, we'll return false and put the	220 // return true. If invalid, we'll return false and put the

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
253 // Every single output character will be escaped. This means that if you	253 // Every single output character will be escaped. This means that if you

254 // give it an ASCII character as input, it will be escaped. Some code uses	254 // give it an ASCII character as input, it will be escaped. Some code uses

255 // this when it knows that a character is invalid according to its rules	255 // this when it knows that a character is invalid according to its rules

256 // for validity. If you don't want escaping for ASCII characters, you will	256 // for validity. If you don't want escaping for ASCII characters, you will

257 // have to filter them out prior to calling this function.	257 // have to filter them out prior to calling this function.

258 //	258 //

259 // Assumes that ch[begin] is within range in the array, but does not assume	259 // Assumes that ch[begin] is within range in the array, but does not assume

260 // that any following characters are.	260 // that any following characters are.

261 inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin,	261 inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin,

262 int length, CanonOutput* output) {	262 int length, CanonOutput* output) {

263 // UTF-16 input. Readchar16 will handle invalid characters for us and give	263 // UTF-16 input. ReadUTFChar will handle invalid characters for us and give

264 // us the kUnicodeReplacementCharacter, so we don't have to do special	264 // us the kUnicodeReplacementCharacter, so we don't have to do special

265 // checking after failure, just pass through the failure to the caller.	265 // checking after failure, just pass through the failure to the caller.

266 unsigned char_value;	266 unsigned char_value;

267 bool success = ReadUTFChar(str, begin, length, &char_value);	267 bool success = ReadUTFChar(str, begin, length, &char_value);

268 AppendUTF8EscapedValue(char_value, output);	268 AppendUTF8EscapedValue(char_value, output);

269 return success;	269 return success;

270 }	270 }

271	271

272 // Handles UTF-8 input. See the wide version above for usage.	272 // Handles UTF-8 input. See the wide version above for usage.

273 inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length,	273 inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length,

(...skipping 150 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
424 inline unsigned long long _strtoui64(const char* nptr,	424 inline unsigned long long _strtoui64(const char* nptr,

425 char** endptr, int base) {	425 char** endptr, int base) {

426 return strtoull(nptr, endptr, base);	426 return strtoull(nptr, endptr, base);

427 }	427 }

428	428

429 #endif // WIN32	429 #endif // WIN32

430	430

431 } // namespace url	431 } // namespace url

432	432

433 #endif // URL_URL_CANON_INTERNAL_H_	433 #endif // URL_URL_CANON_INTERNAL_H_

OLD	NEW