Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(265)

Side by Side Diff: url/url_canon_internal.h

Issue 1270443006: Proof-read comments in src/url/. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef URL_URL_CANON_INTERNAL_H_ 5 #ifndef URL_URL_CANON_INTERNAL_H_
6 #define URL_URL_CANON_INTERNAL_H_ 6 #define URL_URL_CANON_INTERNAL_H_
7 7
8 // This file is intended to be included in another C++ file where the character 8 // This file is intended to be included in another C++ file where the character
9 // types are defined. This allows us to write mostly generic code, but not have 9 // types are defined. This allows us to write mostly generic code, but not have
10 // templace bloat because everything is inlined when anybody calls any of our 10 // template bloat because everything is inlined when anybody calls any of our
11 // functions. 11 // functions.
12 12
13 #include <stdlib.h> 13 #include <stdlib.h>
14 14
15 #include "base/logging.h" 15 #include "base/logging.h"
16 #include "url/url_canon.h" 16 #include "url/url_canon.h"
17 17
18 namespace url { 18 namespace url {
19 19
20 // Character type handling ----------------------------------------------------- 20 // Character type handling -----------------------------------------------------
(...skipping 13 matching lines...) Expand all
34 34
35 // Valid in an ASCII-representation of a hex digit (as in %-escaped). 35 // Valid in an ASCII-representation of a hex digit (as in %-escaped).
36 CHAR_HEX = 8, 36 CHAR_HEX = 8,
37 37
38 // Valid in an ASCII-representation of a decimal digit. 38 // Valid in an ASCII-representation of a decimal digit.
39 CHAR_DEC = 16, 39 CHAR_DEC = 16,
40 40
41 // Valid in an ASCII-representation of an octal digit. 41 // Valid in an ASCII-representation of an octal digit.
42 CHAR_OCT = 32, 42 CHAR_OCT = 32,
43 43
44 // Characters that do not require escaping in encodeURIComponent. Characters 44 // Characters that do not require escaping in encodeURIComponent. Characters
45 // that do not have this flag will be escaped; see url_util.cc. 45 // that do not have this flag will be escaped; see url_util.cc.
46 CHAR_COMPONENT = 64, 46 CHAR_COMPONENT = 64,
47 }; 47 };
48 48
49 // This table contains the flags in SharedCharTypes for each 8-bit character. 49 // This table contains the flags in SharedCharTypes for each 8-bit character.
50 // Some canonicalization functions have their own specialized lookup table. 50 // Some canonicalization functions have their own specialized lookup table.
51 // For those with simple requirements, we have collected the flags in one 51 // For those with simple requirements, we have collected the flags in one
52 // place so there are fewer lookup tables to load into the CPU cache. 52 // place so there are fewer lookup tables to load into the CPU cache.
53 // 53 //
54 // Using an unsigned char type has a small but measurable performance benefit 54 // Using an unsigned char type has a small but measurable performance benefit
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
168 Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), 168 Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
169 output); 169 output);
170 } else if (char_value <= 0xffff) { 170 } else if (char_value <= 0xffff) {
171 // 1110xxxx 10xxxxxx 10xxxxxx 171 // 1110xxxx 10xxxxxx 10xxxxxx
172 Appender(static_cast<unsigned char>(0xe0 | (char_value >> 12)), 172 Appender(static_cast<unsigned char>(0xe0 | (char_value >> 12)),
173 output); 173 output);
174 Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)), 174 Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
175 output); 175 output);
176 Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), 176 Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
177 output); 177 output);
178 } else if (char_value <= 0x10FFFF) { // Max unicode code point. 178 } else if (char_value <= 0x10FFFF) { // Max Unicode code point.
179 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 179 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
180 Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)), 180 Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
181 output); 181 output);
182 Appender(static_cast<unsigned char>(0x80 | ((char_value >> 12) & 0x3f)), 182 Appender(static_cast<unsigned char>(0x80 | ((char_value >> 12) & 0x3f)),
183 output); 183 output);
184 Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)), 184 Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
185 output); 185 output);
186 Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), 186 Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
187 output); 187 output);
188 } else { 188 } else {
189 // Invalid UTF-8 character (>20 bits). 189 // Invalid UTF-8 character (>20 bits).
190 NOTREACHED(); 190 NOTREACHED();
191 } 191 }
192 } 192 }
193 193
194 // Helper used by AppendUTF8Value below. We use an unsigned parameter so there 194 // Helper used by AppendUTF8Value below. We use an unsigned parameter so there
195 // are no funny sign problems with the input, but then have to convert it to 195 // are no funny sign problems with the input, but then have to convert it to
196 // a regular char for appending. 196 // a regular char for appending.
197 inline void AppendCharToOutput(unsigned char ch, CanonOutput* output) { 197 inline void AppendCharToOutput(unsigned char ch, CanonOutput* output) {
198 output->push_back(static_cast<char>(ch)); 198 output->push_back(static_cast<char>(ch));
199 } 199 }
200 200
201 // Writes the given character to the output as UTF-8. This does NO checking 201 // Writes the given character to the output as UTF-8. This does NO checking
202 // of the validity of the unicode characters; the caller should ensure that 202 // of the validity of the Unicode characters; the caller should ensure that
203 // the value it is appending is valid to append. 203 // the value it is appending is valid to append.
204 inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) { 204 inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {
205 DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output); 205 DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);
206 } 206 }
207 207
208 // Writes the given character to the output as UTF-8, escaping ALL 208 // Writes the given character to the output as UTF-8, escaping ALL
209 // characters (even when they are ASCII). This does NO checking of the 209 // characters (even when they are ASCII). This does NO checking of the
210 // validity of the unicode characters; the caller should ensure that the value 210 // validity of the Unicode characters; the caller should ensure that the value
211 // it is appending is valid to append. 211 // it is appending is valid to append.
212 inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) { 212 inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {
213 DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output); 213 DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);
214 } 214 }
215 215
216 // UTF-16 functions ----------------------------------------------------------- 216 // UTF-16 functions -----------------------------------------------------------
217 217
218 // Reads one character in UTF-16 starting at |*begin| in |str| and places 218 // Reads one character in UTF-16 starting at |*begin| in |str| and places
219 // the decoded value into |*code_point|. If the character is valid, we will 219 // the decoded value into |*code_point|. If the character is valid, we will
220 // return true. If invalid, we'll return false and put the 220 // return true. If invalid, we'll return false and put the
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
253 // Every single output character will be escaped. This means that if you 253 // Every single output character will be escaped. This means that if you
254 // give it an ASCII character as input, it will be escaped. Some code uses 254 // give it an ASCII character as input, it will be escaped. Some code uses
255 // this when it knows that a character is invalid according to its rules 255 // this when it knows that a character is invalid according to its rules
256 // for validity. If you don't want escaping for ASCII characters, you will 256 // for validity. If you don't want escaping for ASCII characters, you will
257 // have to filter them out prior to calling this function. 257 // have to filter them out prior to calling this function.
258 // 258 //
259 // Assumes that ch[begin] is within range in the array, but does not assume 259 // Assumes that ch[begin] is within range in the array, but does not assume
260 // that any following characters are. 260 // that any following characters are.
261 inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin, 261 inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin,
262 int length, CanonOutput* output) { 262 int length, CanonOutput* output) {
263 // UTF-16 input. Readchar16 will handle invalid characters for us and give 263 // UTF-16 input. ReadUTFChar will handle invalid characters for us and give
264 // us the kUnicodeReplacementCharacter, so we don't have to do special 264 // us the kUnicodeReplacementCharacter, so we don't have to do special
265 // checking after failure, just pass through the failure to the caller. 265 // checking after failure, just pass through the failure to the caller.
266 unsigned char_value; 266 unsigned char_value;
267 bool success = ReadUTFChar(str, begin, length, &char_value); 267 bool success = ReadUTFChar(str, begin, length, &char_value);
268 AppendUTF8EscapedValue(char_value, output); 268 AppendUTF8EscapedValue(char_value, output);
269 return success; 269 return success;
270 } 270 }
271 271
272 // Handles UTF-8 input. See the wide version above for usage. 272 // Handles UTF-8 input. See the wide version above for usage.
273 inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length, 273 inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length,
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after
424 inline unsigned long long _strtoui64(const char* nptr, 424 inline unsigned long long _strtoui64(const char* nptr,
425 char** endptr, int base) { 425 char** endptr, int base) {
426 return strtoull(nptr, endptr, base); 426 return strtoull(nptr, endptr, base);
427 } 427 }
428 428
429 #endif // WIN32 429 #endif // WIN32
430 430
431 } // namespace url 431 } // namespace url
432 432
433 #endif // URL_URL_CANON_INTERNAL_H_ 433 #endif // URL_URL_CANON_INTERNAL_H_
OLDNEW
« url/gurl.h ('K') | « url/url_canon_host.cc ('k') | url/url_canon_internal.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698