OLD | NEW |
1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 13 matching lines...) Expand all Loading... |
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 |
30 #include "googleurl/src/url_canon_ip.h" | 30 #include "googleurl/src/url_canon_ip.h" |
31 | 31 |
32 #include <stdlib.h> | 32 #include <stdlib.h> |
33 | 33 |
| 34 #include "base/logging.h" |
34 #include "googleurl/src/url_canon_internal.h" | 35 #include "googleurl/src/url_canon_internal.h" |
35 | 36 |
36 namespace url_canon { | 37 namespace url_canon { |
37 | 38 |
38 namespace { | 39 namespace { |
39 | 40 |
40 // Converts one of the character types that represent a numerical base to the | 41 // Converts one of the character types that represent a numerical base to the |
41 // corresponding base. | 42 // corresponding base. |
42 int BaseForType(SharedCharTypes type) { | 43 int BaseForType(SharedCharTypes type) { |
43 switch (type) { | 44 switch (type) { |
44 case CHAR_HEX: | 45 case CHAR_HEX: |
45 return 16; | 46 return 16; |
46 case CHAR_DEC: | 47 case CHAR_DEC: |
47 return 10; | 48 return 10; |
48 case CHAR_OCT: | 49 case CHAR_OCT: |
49 return 8; | 50 return 8; |
50 default: | 51 default: |
51 return 0; | 52 return 0; |
52 } | 53 } |
53 } | 54 } |
54 | 55 |
55 template<typename CHAR, typename UCHAR> | 56 template<typename CHAR, typename UCHAR> |
56 bool DoFindIPv4Components(const CHAR* spec, | 57 bool DoFindIPv4Components(const CHAR* spec, |
57 const url_parse::Component& host, | 58 const url_parse::Component& host, |
58 url_parse::Component components[4]) { | 59 url_parse::Component components[4]) { |
59 int cur_component = 0; // Index of the component we're working on. | 60 int cur_component = 0; // Index of the component we're working on. |
60 int cur_component_begin = host.begin; // Start of the current component. | 61 int cur_component_begin = host.begin; // Start of the current component. |
61 int end = host.end(); | 62 int end = host.end(); |
62 for (int i = host.begin; /* nothing */; i++) { | 63 for (int i = host.begin; /* nothing */; i++) { |
63 if (i == end || spec[i] == '.') { | 64 if (i == end || spec[i] == '.') { |
64 // Found the end of the current component. | 65 // Found the end of the current component. |
65 int component_len = i - cur_component_begin; | 66 int component_len = i - cur_component_begin; |
66 components[cur_component] = | 67 components[cur_component] = |
67 url_parse::Component(cur_component_begin, component_len); | 68 url_parse::Component(cur_component_begin, component_len); |
68 | 69 |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
172 | 173 |
173 for (int ch = 0; str[ch] != 0; ch++) | 174 for (int ch = 0; str[ch] != 0; ch++) |
174 output->push_back(str[ch]); | 175 output->push_back(str[ch]); |
175 | 176 |
176 if (i != 3) | 177 if (i != 3) |
177 output->push_back('.'); | 178 output->push_back('.'); |
178 } | 179 } |
179 out_host->len = output->length() - out_host->begin; | 180 out_host->len = output->length() - out_host->begin; |
180 } | 181 } |
181 | 182 |
182 template<typename CHAR, typename UCHAR> | 183 // Converts an IPv4 address to a 32-bit number (network byte order), returning |
183 bool DoCanonicalizeIPv4Address(const CHAR* spec, | 184 // true on success. False means that the input is not a valid IPv4 address. |
184 const url_parse::Component& host, | 185 template<typename CHAR> |
185 CanonOutput* output, | 186 bool IPv4AddressToNumber(const CHAR* spec, |
186 url_parse::Component* out_host) { | 187 const url_parse::Component& host, |
| 188 unsigned char address[4]) { |
187 // The identified components. Not all may exist. | 189 // The identified components. Not all may exist. |
188 url_parse::Component components[4]; | 190 url_parse::Component components[4]; |
189 if (!FindIPv4Components(spec, host, components)) | 191 if (!FindIPv4Components(spec, host, components)) |
190 return false; | 192 return false; |
191 | 193 |
192 // Convert existing components to digits. Values up to | 194 // Convert existing components to digits. Values up to |
193 // |existing_components| will be valid. | 195 // |existing_components| will be valid. |
194 uint32_t component_values[4]; | 196 uint32_t component_values[4]; |
195 int existing_components = 0; | 197 int existing_components = 0; |
196 for (int i = 0; i < 4; i++) { | 198 for (int i = 0; i < 4; i++) { |
197 if (components[i].len <= 0) | 199 if (components[i].len <= 0) |
198 continue; | 200 continue; |
199 if (!IPv4ComponentToNumber(spec, components[i], | 201 if (!IPv4ComponentToNumber(spec, components[i], |
200 &component_values[existing_components])) | 202 &component_values[existing_components])) |
201 return false; | 203 return false; |
202 existing_components++; | 204 existing_components++; |
203 } | 205 } |
204 | 206 |
205 // Use that sequence of numbers to fill out the 4-component IP address. | 207 // Use that sequence of numbers to fill out the 4-component IP address. |
206 unsigned char address[4]; | |
207 | 208 |
208 // ...first fill all but the last component by truncating to one byte. | 209 // ...first fill all but the last component by truncating to one byte. |
209 for (int i = 0; i < existing_components - 1; i++) | 210 for (int i = 0; i < existing_components - 1; i++) |
210 address[i] = static_cast<unsigned char>(component_values[i]); | 211 address[i] = static_cast<unsigned char>(component_values[i]); |
211 | 212 |
212 // ...then fill out the rest of the bytes by filling them with the last | 213 // ...then fill out the rest of the bytes by filling them with the last |
213 // component. | 214 // component. |
214 uint32_t last_value = component_values[existing_components - 1]; | 215 uint32_t last_value = component_values[existing_components - 1]; |
215 if (existing_components == 1) | 216 if (existing_components == 1) |
216 address[0] = (last_value & 0xFF000000) >> 24; | 217 address[0] = (last_value & 0xFF000000) >> 24; |
217 if (existing_components <= 2) | 218 if (existing_components <= 2) |
218 address[1] = (last_value & 0x00FF0000) >> 16; | 219 address[1] = (last_value & 0x00FF0000) >> 16; |
219 if (existing_components <= 3) | 220 if (existing_components <= 3) |
220 address[2] = (last_value & 0x0000FF00) >> 8; | 221 address[2] = (last_value & 0x0000FF00) >> 8; |
221 address[3] = last_value & 0xFF; | 222 address[3] = last_value & 0xFF; |
222 | 223 |
| 224 return true; |
| 225 } |
| 226 |
| 227 template<typename CHAR, typename UCHAR> |
| 228 bool DoCanonicalizeIPv4Address(const CHAR* spec, |
| 229 const url_parse::Component& host, |
| 230 CanonOutput* output, |
| 231 url_parse::Component* out_host) { |
| 232 unsigned char address[4]; |
| 233 if (!IPv4AddressToNumber<CHAR>(spec, host, address)) |
| 234 return false; |
| 235 |
223 AppendIPv4Address(address, output, out_host); | 236 AppendIPv4Address(address, output, out_host); |
224 return true; | 237 return true; |
225 } | 238 } |
226 | 239 |
227 // This function does NO canonicalization. It does _some_ validation | 240 // Helper class that describes the main components of an IPv6 input string. |
228 // and then copies the component as is to the output. | 241 // See the following examples to understand how it breaks up an input string: |
229 // TODO: Actual canonicalization! | 242 // |
| 243 // [Example 1]: input = "[::aa:bb]" |
| 244 // ==> num_hex_components = 2 |
| 245 // ==> hex_components[0] = Component(3,2) "aa" |
| 246 // ==> hex_components[1] = Component(6,2) "bb" |
| 247 // ==> index_of_contraction = 0 |
| 248 // ==> ipv4_component = Component(0, -1) |
| 249 // |
| 250 // [Example 2]: input = "[1:2::3:4:5]" |
| 251 // ==> num_hex_components = 5 |
| 252 // ==> hex_components[0] = Component(1,1) "1" |
| 253 // ==> hex_components[1] = Component(3,1) "2" |
| 254 // ==> hex_components[2] = Component(6,1) "3" |
| 255 // ==> hex_components[3] = Component(8,1) "4" |
| 256 // ==> hex_components[4] = Component(10,1) "5" |
| 257 // ==> index_of_contraction = 2 |
| 258 // ==> ipv4_component = Component(0, -1) |
| 259 // |
| 260 // [Example 3]: input = "[::ffff:192.168.0.1]" |
| 261 // ==> num_hex_components = 1 |
| 262 // ==> hex_components[0] = Component(3,4) "ffff" |
| 263 // ==> index_of_contraction = 0 |
| 264 // ==> ipv4_component = Component(8, 11) "192.168.0.1" |
| 265 // |
| 266 // [Example 4]: input = "[1::]" |
| 267 // ==> num_hex_components = 1 |
| 268 // ==> hex_components[0] = Component(1,1) "1" |
| 269 // ==> index_of_contraction = 1 |
| 270 // ==> ipv4_component = Component(0, -1) |
| 271 // |
| 272 // [Example 5]: input = "[::192.168.0.1]" |
| 273 // ==> num_hex_components = 0 |
| 274 // ==> index_of_contraction = 0 |
| 275 // ==> ipv4_component = Component(8, 11) "192.168.0.1" |
| 276 // |
| 277 struct IPv6Parsed { |
| 278 // Zero-out the parse information. |
| 279 void reset() { |
| 280 num_hex_components = 0; |
| 281 index_of_contraction = -1; |
| 282 ipv4_component.reset(); |
| 283 } |
| 284 |
| 285 // There can be up to 8 hex components (colon separated) in the literal. |
| 286 url_parse::Component hex_components[8]; |
| 287 |
| 288 // The count of hex components present. Ranges from [0,8]. |
| 289 int num_hex_components; |
| 290 |
| 291 // The index of the hex component that the "::" contraction precedes, or |
| 292 // -1 if there is no contraction. |
| 293 int index_of_contraction; |
| 294 |
| 295 // The range of characters which are an IPv4 literal. |
| 296 url_parse::Component ipv4_component; |
| 297 }; |
| 298 |
| 299 // Parse the IPv6 input string. If parsing succeeded returns true and fills |
| 300 // |parsed| with the information. If parsing failed (because the input is |
| 301 // invalid) returns false. |
| 302 template<typename CHAR, typename UCHAR> |
| 303 bool DoParseIPv6(const CHAR* spec, |
| 304 const url_parse::Component& host, |
| 305 IPv6Parsed* parsed) { |
| 306 // Zero-out the info. |
| 307 parsed->reset(); |
| 308 |
| 309 if (!host.is_nonempty()) |
| 310 return false; |
| 311 |
| 312 // The index for start and end of address range (no brackets). |
| 313 int begin = host.begin; |
| 314 int end = host.end(); |
| 315 |
| 316 int cur_component_begin = begin; // Start of the current component. |
| 317 |
| 318 // Scan through the input, searching for hex components, "::" contractions, |
| 319 // and IPv4 components. |
| 320 for (int i = begin; /* i <= end */; i++) { |
| 321 bool is_colon = spec[i] == ':'; |
| 322 bool is_contraction = is_colon && i < end - 1 && spec[i + 1] == ':'; |
| 323 |
| 324 // We reached the end of the current component if we encounter a colon |
| 325 // (separator between hex components, or start of a contraction), or end of |
| 326 // input. |
| 327 if (is_colon || i == end) { |
| 328 int component_len = i - cur_component_begin; |
| 329 |
| 330 // A component should not have more than 4 hex digits. |
| 331 if (component_len > 4) |
| 332 return false; |
| 333 |
| 334 // Don't allow empty components. |
| 335 if (component_len == 0) { |
| 336 // The exception is when contractions appear at beginning of the |
| 337 // input or at the end of the input. |
| 338 if (!((is_contraction && i == begin) || (i == end && |
| 339 parsed->index_of_contraction == parsed->num_hex_components))) |
| 340 return false; |
| 341 } |
| 342 |
| 343 // Add the hex component we just found to running list. |
| 344 if (component_len > 0) { |
| 345 // Can't have more than 8 components! |
| 346 if (parsed->num_hex_components >= 8) |
| 347 return false; |
| 348 |
| 349 parsed->hex_components[parsed->num_hex_components++] = |
| 350 url_parse::Component(cur_component_begin, component_len); |
| 351 } |
| 352 } |
| 353 |
| 354 if (i == end) |
| 355 break; // Reached the end of the input, DONE. |
| 356 |
| 357 // We found a "::" contraction. |
| 358 if (is_contraction) { |
| 359 // There can be at most one contraction in the literal. |
| 360 if (parsed->index_of_contraction != -1) |
| 361 return false; |
| 362 parsed->index_of_contraction = parsed->num_hex_components; |
| 363 ++i; // Consume the colon we peeked. |
| 364 } |
| 365 |
| 366 if (is_colon) { |
| 367 // Colons are separators between components, keep track of where the |
| 368 // current component started (after this colon). |
| 369 cur_component_begin = i + 1; |
| 370 } else { |
| 371 if (static_cast<UCHAR>(spec[i]) >= 0x80) |
| 372 return false; // Not ASCII. |
| 373 |
| 374 if (!IsHexChar(static_cast<unsigned char>(spec[i]))) { |
| 375 // Regular components are hex numbers. It is also possible for |
| 376 // a component to be an IPv4 address in dotted form. |
| 377 if (IsIPv4Char(static_cast<unsigned char>(spec[i]))) { |
| 378 // Since IPv4 address can only appear at the end, assume the rest |
| 379 // of the string is an IPv4 address. (We will parse this separately |
| 380 // later). |
| 381 parsed->ipv4_component = url_parse::Component( |
| 382 cur_component_begin, end - cur_component_begin); |
| 383 break; |
| 384 } else { |
| 385 // The character was neither a hex digit, nor an IPv4 character. |
| 386 return false; |
| 387 } |
| 388 } |
| 389 } |
| 390 } |
| 391 |
| 392 return true; |
| 393 } |
| 394 |
| 395 // Verifies the parsed IPv6 information, checking that the various components |
| 396 // add up to the right number of bits (hex components are 16 bits, while |
| 397 // embedded IPv4 formats are 32 bits, and contractions are placeholdes for |
| 398 // 16 or more bits). Returns true if sizes match up, false otherwise. On |
| 399 // success writes the length of the contraction (if any) to |
| 400 // |out_num_bytes_of_contraction|. |
| 401 bool CheckIPv6ComponentsSize(const IPv6Parsed& parsed, |
| 402 int* out_num_bytes_of_contraction) { |
| 403 // Each group of four hex digits contributes 16 bits. |
| 404 int num_bytes_without_contraction = parsed.num_hex_components * 2; |
| 405 |
| 406 // If an IPv4 address was embedded at the end, it contributes 32 bits. |
| 407 if (parsed.ipv4_component.is_valid()) |
| 408 num_bytes_without_contraction += 4; |
| 409 |
| 410 // If there was a "::" contraction, its size is going to be: |
| 411 // MAX([16bits], [128bits] - num_bytes_without_contraction). |
| 412 int num_bytes_of_contraction = 0; |
| 413 if (parsed.index_of_contraction != -1) { |
| 414 num_bytes_of_contraction = 16 - num_bytes_without_contraction; |
| 415 if (num_bytes_of_contraction < 2) |
| 416 num_bytes_of_contraction = 2; |
| 417 } |
| 418 |
| 419 // Check that the numbers add up. |
| 420 if (num_bytes_without_contraction + num_bytes_of_contraction != 16) |
| 421 return false; |
| 422 |
| 423 *out_num_bytes_of_contraction = num_bytes_of_contraction; |
| 424 return true; |
| 425 } |
| 426 |
| 427 // Converts a hex comonent into a number. This cannot fail since the caller has |
| 428 // already verified that each character in the string was a hex digit, and |
| 429 // that there were no more than 4 characters. |
| 430 template<typename CHAR> |
| 431 uint16_t IPv6HexComponentToNumber(const CHAR* spec, |
| 432 const url_parse::Component& component) { |
| 433 DCHECK(component.len <= 4); |
| 434 |
| 435 // Copy the hex string into a C-string. |
| 436 char buf[5]; |
| 437 for (int i = 0; i < component.len; ++i) |
| 438 buf[i] = static_cast<char>(spec[component.begin + i]); |
| 439 buf[component.len] = '\0'; |
| 440 |
| 441 // Convert it to a number (overflow is not possible, since with 4 hex |
| 442 // characters we can at most have a 16 bit number). |
| 443 return static_cast<uint16_t>(_strtoui64(buf, NULL, 16)); |
| 444 } |
| 445 |
| 446 // Converts an IPv6 address to a 128-bit number (network byte order), returning |
| 447 // true on success. False means that the input was not a valid IPv6 address. |
| 448 template<typename CHAR, typename UCHAR> |
| 449 bool IPv6AddressToNumber(const CHAR* spec, |
| 450 const url_parse::Component& host, |
| 451 unsigned char address[16]) { |
| 452 // Make sure the component is bounded by '[' and ']'. |
| 453 int end = host.end(); |
| 454 if (!host.is_nonempty() || spec[host.begin] != '[' || spec[end - 1] != ']') |
| 455 return false; |
| 456 |
| 457 // Exclude the square brackets. |
| 458 url_parse::Component ipv6_comp(host.begin + 1, host.len - 2); |
| 459 |
| 460 // Parse the IPv6 address -- identify where all the colon separated hex |
| 461 // components are, the "::" contraction, and the embedded IPv4 address. |
| 462 IPv6Parsed ipv6_parsed; |
| 463 if (!DoParseIPv6<CHAR, UCHAR>(spec, ipv6_comp, &ipv6_parsed)) |
| 464 return false; |
| 465 |
| 466 // Do some basic size checks to make sure that the address doesn't |
| 467 // specify more than 128 bits or fewer than 128 bits. This also resolves |
| 468 // how may zero bytes the "::" contraction represents. |
| 469 int num_bytes_of_contraction; |
| 470 if (!CheckIPv6ComponentsSize(ipv6_parsed, &num_bytes_of_contraction)) |
| 471 return false; |
| 472 |
| 473 int cur_index_in_address = 0; |
| 474 |
| 475 // Loop through each hex components, and contraction in order. |
| 476 for (int i = 0; i <= ipv6_parsed.num_hex_components; ++i) { |
| 477 // Append the contraction if it appears before this component. |
| 478 if (i == ipv6_parsed.index_of_contraction) { |
| 479 for (int j = 0; j < num_bytes_of_contraction; ++j) |
| 480 address[cur_index_in_address++] = 0; |
| 481 } |
| 482 // Append the hex component's value. |
| 483 if (i != ipv6_parsed.num_hex_components) { |
| 484 // Get the 16-bit value for this hex component. |
| 485 uint16_t number = IPv6HexComponentToNumber<CHAR>( |
| 486 spec, ipv6_parsed.hex_components[i]); |
| 487 // Append to |address|, in network byte order. |
| 488 address[cur_index_in_address++] = (number & 0xFF00) >> 8; |
| 489 address[cur_index_in_address++] = (number & 0x00FF); |
| 490 } |
| 491 } |
| 492 |
| 493 // If there was an IPv4 section, convert it into a 32-bit number and append |
| 494 // it to |address|. |
| 495 if (ipv6_parsed.ipv4_component.is_valid()) { |
| 496 // We only allow the embedded IPv4 syntax to be used for "compat" and |
| 497 // "mapped" formats: |
| 498 // "compat" ==> 0:0:0:0:0:ffff:<IPv4-literal> |
| 499 // "mapped" ==> 0:0:0:0:0:0000:<IPv4-literal> |
| 500 for (int j = 0; j < 10; ++j) { |
| 501 if (address[j] != 0) |
| 502 return false; |
| 503 } |
| 504 if (!((address[10] == 0 && address[11] == 0) || |
| 505 (address[10] == 0xFF && address[11] == 0xFF))) |
| 506 return false; |
| 507 |
| 508 // Append the 32-bit number to |address|. |
| 509 if (!IPv4AddressToNumber(spec, |
| 510 ipv6_parsed.ipv4_component, |
| 511 &address[cur_index_in_address])) |
| 512 return false; |
| 513 } |
| 514 |
| 515 return true; |
| 516 } |
| 517 |
| 518 // Searches for the longest sequence of zeros in |address|, and writes the |
| 519 // range into |contraction_range|. The run of zeros must be at least 16 bits, |
| 520 // and if there is a tie the first is chosen. |
| 521 void ChooseIPv6ContractionRange(const unsigned char address[16], |
| 522 url_parse::Component* contraction_range) { |
| 523 // The longest run of zeros in |address| seen so far. |
| 524 url_parse::Component max_range; |
| 525 |
| 526 // The current run of zeros in |address| being iterated over. |
| 527 url_parse::Component cur_range; |
| 528 |
| 529 for (int i = 0; i < 16; i += 2) { |
| 530 // Test for 16 bits worth of zero. |
| 531 bool is_zero = (address[i] == 0 && address[i + 1] == 0); |
| 532 |
| 533 if (is_zero) { |
| 534 // Add the zero to the current range (or start a new one). |
| 535 if (!cur_range.is_valid()) |
| 536 cur_range = url_parse::Component(i, 0); |
| 537 cur_range.len += 2; |
| 538 } |
| 539 |
| 540 if (!is_zero || i == 14) { |
| 541 // Just completed a run of zeros. If the run is greater than 16 bits, |
| 542 // it is a candidate for the contraction. |
| 543 if (cur_range.len > 2 && cur_range.len > max_range.len) { |
| 544 max_range = cur_range; |
| 545 } |
| 546 cur_range.reset(); |
| 547 } |
| 548 } |
| 549 *contraction_range = max_range; |
| 550 } |
| 551 |
230 template<typename CHAR, typename UCHAR> | 552 template<typename CHAR, typename UCHAR> |
231 bool DoCanonicalizeIPv6Address(const CHAR* spec, | 553 bool DoCanonicalizeIPv6Address(const CHAR* spec, |
232 const url_parse::Component& host, | 554 const url_parse::Component& host, |
233 CanonOutput* output, | 555 CanonOutput* output, |
234 url_parse::Component* out_host) { | 556 url_parse::Component* out_host) { |
235 // Make sure the component is bounded by '[' and ']'. | 557 // Turn the IP address into a 128 bit number. |
236 int end = host.end(); | 558 unsigned char address[16]; |
237 if (!host.is_nonempty() || spec[host.begin] != '[' || spec[end - 1] != ']') | 559 if (!IPv6AddressToNumber<CHAR, UCHAR>(spec, host, address)) |
238 return false; | 560 return false; |
239 | 561 |
240 int num_colons = 0; | 562 out_host->begin = output->length(); |
241 int num_dots = 0; | 563 output->push_back('['); |
242 int num_hex = 0; | 564 |
243 for (int i = host.begin + 1; i < end - 1; i++) { | 565 // We will now output the address according to the rules in: |
244 if (static_cast<UCHAR>(spec[i]) >= 0x80) | 566 // http://tools.ietf.org/html/draft-kawamura-ipv6-text-representation-01#secti
on-4 |
245 return false; | 567 |
246 | 568 // Start by finding where to place the "::" contraction (if any). |
247 unsigned char u = static_cast<unsigned char>(spec[i]); | 569 url_parse::Component contraction_range; |
248 if (IsHexChar(u)) { | 570 ChooseIPv6ContractionRange(address, &contraction_range); |
249 // No block between ':'s can be more than 4 hex characters. | 571 |
250 if (num_hex > 3) | 572 for (int i = 0; i < 16;) { |
251 return false; | 573 if (i == contraction_range.begin && contraction_range.len > 0) { |
252 num_hex++; | 574 // Jump over the contraction. |
253 } else if (u == ':') { | 575 if (i == 0) |
254 // No ':'s can appear after '.'s have appeared and there can be no | 576 output->push_back(':'); |
255 // more than 7 ':'s separating the 8 hex shorts. | 577 output->push_back(':'); |
256 if (num_dots > 0 || num_colons > 6) | 578 i = contraction_range.end(); |
257 return false; | |
258 num_colons++; | |
259 num_hex = 0; | |
260 } else if (u == '.') { | |
261 // No hex chars between ':'s is fine (signifies successive | |
262 // zeroed shorts concatentated, but can only be used once). Not | |
263 // valid for embedded IPv4 addresses, however. | |
264 if (num_hex < 1) | |
265 return false; | |
266 num_dots++; | |
267 num_hex = 0; | |
268 } else { | 579 } else { |
269 // Invalid characters for an IPv6 address. | 580 // Consume the next 16 bits from |address|. |
270 return false; | 581 int x = address[i] << 8 | address[i + 1]; |
271 } | 582 |
272 } | 583 i += 2; |
273 if (num_colons < 2) | 584 |
274 return false; | 585 // Stringify the 16 bit number (at most requires 4 hex digits). |
275 if (num_dots != 0 && num_dots != 3) | 586 char str[5]; |
276 return false; | 587 _itoa_s(x, str, 16); |
277 | 588 for (int ch = 0; str[ch] != 0; ++ch) |
278 // This passed all the checks thus far, so just copy input to output. | 589 output->push_back(str[ch]); |
279 // NOTE: It may still be invalid, and it's definitely not canonicalized. | 590 |
280 // TODO: Actually canonicalize. | 591 // Put a colon after each number, except the last. |
281 out_host->begin = output->length(); | 592 if (i < 16) |
282 for (int i = host.begin; i < end; i++) | 593 output->push_back(':'); |
283 output->push_back(static_cast<char>(spec[i])); | 594 } |
| 595 } |
| 596 |
| 597 output->push_back(']'); |
284 out_host->len = output->length() - out_host->begin; | 598 out_host->len = output->length() - out_host->begin; |
285 return true; | 599 |
286 } | 600 return true; |
287 | 601 } |
| 602 |
288 } // namespace | 603 } // namespace |
289 | 604 |
290 bool FindIPv4Components(const char* spec, | 605 bool FindIPv4Components(const char* spec, |
291 const url_parse::Component& host, | 606 const url_parse::Component& host, |
292 url_parse::Component components[4]) { | 607 url_parse::Component components[4]) { |
293 return DoFindIPv4Components<char, unsigned char>(spec, host, components); | 608 return DoFindIPv4Components<char, unsigned char>(spec, host, components); |
294 } | 609 } |
295 | 610 |
296 bool FindIPv4Components(const char16* spec, | 611 bool FindIPv4Components(const char16* spec, |
297 const url_parse::Component& host, | 612 const url_parse::Component& host, |
(...skipping 17 matching lines...) Expand all Loading... |
315 CanonOutput* output, | 630 CanonOutput* output, |
316 url_parse::Component* out_host) { | 631 url_parse::Component* out_host) { |
317 return | 632 return |
318 DoCanonicalizeIPv4Address<char16, char16>( | 633 DoCanonicalizeIPv4Address<char16, char16>( |
319 spec, host, output, out_host) || | 634 spec, host, output, out_host) || |
320 DoCanonicalizeIPv6Address<char16, char16>( | 635 DoCanonicalizeIPv6Address<char16, char16>( |
321 spec, host, output, out_host); | 636 spec, host, output, out_host); |
322 } | 637 } |
323 | 638 |
324 } // namespace url_canon | 639 } // namespace url_canon |
OLD | NEW |