| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "url/url_canon_ip.h" | 5 #include "url/url_canon_ip.h" |
| 6 | 6 |
| 7 #include <stdint.h> |
| 7 #include <stdlib.h> | 8 #include <stdlib.h> |
| 9 #include <limits> |
| 8 | 10 |
| 9 #include "base/basictypes.h" | |
| 10 #include "base/logging.h" | 11 #include "base/logging.h" |
| 11 #include "url/url_canon_internal.h" | 12 #include "url/url_canon_internal.h" |
| 12 | 13 |
| 13 namespace url { | 14 namespace url { |
| 14 | 15 |
| 15 namespace { | 16 namespace { |
| 16 | 17 |
| 17 // Converts one of the character types that represent a numerical base to the | 18 // Converts one of the character types that represent a numerical base to the |
| 18 // corresponding base. | 19 // corresponding base. |
| 19 int BaseForType(SharedCharTypes type) { | 20 int BaseForType(SharedCharTypes type) { |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 85 // - IPV4 - The number was valid, and did not overflow. | 86 // - IPV4 - The number was valid, and did not overflow. |
| 86 // - BROKEN - The input was numeric, but too large for a 32-bit field. | 87 // - BROKEN - The input was numeric, but too large for a 32-bit field. |
| 87 // - NEUTRAL - Input was not numeric. | 88 // - NEUTRAL - Input was not numeric. |
| 88 // | 89 // |
| 89 // The input is assumed to be ASCII. FindIPv4Components should have stripped | 90 // The input is assumed to be ASCII. FindIPv4Components should have stripped |
| 90 // out any input that is greater than 7 bits. The components are assumed | 91 // out any input that is greater than 7 bits. The components are assumed |
| 91 // to be non-empty. | 92 // to be non-empty. |
| 92 template<typename CHAR> | 93 template<typename CHAR> |
| 93 CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec, | 94 CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec, |
| 94 const Component& component, | 95 const Component& component, |
| 95 uint32* number) { | 96 uint32_t* number) { |
| 96 // Figure out the base | 97 // Figure out the base |
| 97 SharedCharTypes base; | 98 SharedCharTypes base; |
| 98 int base_prefix_len = 0; // Size of the prefix for this base. | 99 int base_prefix_len = 0; // Size of the prefix for this base. |
| 99 if (spec[component.begin] == '0') { | 100 if (spec[component.begin] == '0') { |
| 100 // Either hex or dec, or a standalone zero. | 101 // Either hex or dec, or a standalone zero. |
| 101 if (component.len == 1) { | 102 if (component.len == 1) { |
| 102 base = CHAR_DEC; | 103 base = CHAR_DEC; |
| 103 } else if (spec[component.begin + 1] == 'X' || | 104 } else if (spec[component.begin + 1] == 'X' || |
| 104 spec[component.begin + 1] == 'x') { | 105 spec[component.begin + 1] == 'x') { |
| 105 base = CHAR_HEX; | 106 base = CHAR_HEX; |
| 106 base_prefix_len = 2; | 107 base_prefix_len = 2; |
| 107 } else { | 108 } else { |
| 108 base = CHAR_OCT; | 109 base = CHAR_OCT; |
| 109 base_prefix_len = 1; | 110 base_prefix_len = 1; |
| 110 } | 111 } |
| 111 } else { | 112 } else { |
| 112 base = CHAR_DEC; | 113 base = CHAR_DEC; |
| 113 } | 114 } |
| 114 | 115 |
| 115 // Extend the prefix to consume all leading zeros. | 116 // Extend the prefix to consume all leading zeros. |
| 116 while (base_prefix_len < component.len && | 117 while (base_prefix_len < component.len && |
| 117 spec[component.begin + base_prefix_len] == '0') | 118 spec[component.begin + base_prefix_len] == '0') |
| 118 base_prefix_len++; | 119 base_prefix_len++; |
| 119 | 120 |
| 120 // Put the component, minus any base prefix, into a NULL-terminated buffer so | 121 // Put the component, minus any base prefix, into a NULL-terminated buffer so |
| 121 // we can call the standard library. Because leading zeros have already been | 122 // we can call the standard library. Because leading zeros have already been |
| 122 // discarded, filling the entire buffer is guaranteed to trigger the 32-bit | 123 // discarded, filling the entire buffer is guaranteed to trigger the 32-bit |
| 123 // overflow check. | 124 // overflow check. |
| 124 const int kMaxComponentLen = 16; | 125 const int kMaxComponentLen = 16; |
| 125 char buf[kMaxComponentLen + 1]; // digits + '\0' | 126 char buf[kMaxComponentLen + 1]; // digits + '\0' |
| 126 int dest_i = 0; | 127 int dest_i = 0; |
| 127 for (int i = component.begin + base_prefix_len; i < component.end(); i++) { | 128 for (int i = component.begin + base_prefix_len; i < component.end(); i++) { |
| 128 // We know the input is 7-bit, so convert to narrow (if this is the wide | 129 // We know the input is 7-bit, so convert to narrow (if this is the wide |
| 129 // version of the template) by casting. | 130 // version of the template) by casting. |
| 130 char input = static_cast<char>(spec[i]); | 131 char input = static_cast<char>(spec[i]); |
| 131 | 132 |
| 132 // Validate that this character is OK for the given base. | 133 // Validate that this character is OK for the given base. |
| 133 if (!IsCharOfType(input, base)) | 134 if (!IsCharOfType(input, base)) |
| 134 return CanonHostInfo::NEUTRAL; | 135 return CanonHostInfo::NEUTRAL; |
| 135 | 136 |
| 136 // Fill the buffer, if there's space remaining. This check allows us to | 137 // Fill the buffer, if there's space remaining. This check allows us to |
| 137 // verify that all characters are numeric, even those that don't fit. | 138 // verify that all characters are numeric, even those that don't fit. |
| 138 if (dest_i < kMaxComponentLen) | 139 if (dest_i < kMaxComponentLen) |
| 139 buf[dest_i++] = input; | 140 buf[dest_i++] = input; |
| 140 } | 141 } |
| 141 | 142 |
| 142 buf[dest_i] = '\0'; | 143 buf[dest_i] = '\0'; |
| 143 | 144 |
| 144 // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal | 145 // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal |
| 145 // number can overflow a 64-bit number in <= 16 characters). | 146 // number can overflow a 64-bit number in <= 16 characters). |
| 146 uint64 num = _strtoui64(buf, NULL, BaseForType(base)); | 147 uint64_t num = _strtoui64(buf, NULL, BaseForType(base)); |
| 147 | 148 |
| 148 // Check for 32-bit overflow. | 149 // Check for 32-bit overflow. |
| 149 if (num > kuint32max) | 150 if (num > std::numeric_limits<uint32_t>::max()) |
| 150 return CanonHostInfo::BROKEN; | 151 return CanonHostInfo::BROKEN; |
| 151 | 152 |
| 152 // No overflow. Success! | 153 // No overflow. Success! |
| 153 *number = static_cast<uint32>(num); | 154 *number = static_cast<uint32_t>(num); |
| 154 return CanonHostInfo::IPV4; | 155 return CanonHostInfo::IPV4; |
| 155 } | 156 } |
| 156 | 157 |
| 157 // See declaration of IPv4AddressToNumber for documentation. | 158 // See declaration of IPv4AddressToNumber for documentation. |
| 158 template<typename CHAR> | 159 template<typename CHAR> |
| 159 CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec, | 160 CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec, |
| 160 const Component& host, | 161 const Component& host, |
| 161 unsigned char address[4], | 162 unsigned char address[4], |
| 162 int* num_ipv4_components) { | 163 int* num_ipv4_components) { |
| 163 // The identified components. Not all may exist. | 164 // The identified components. Not all may exist. |
| 164 Component components[4]; | 165 Component components[4]; |
| 165 if (!FindIPv4Components(spec, host, components)) | 166 if (!FindIPv4Components(spec, host, components)) |
| 166 return CanonHostInfo::NEUTRAL; | 167 return CanonHostInfo::NEUTRAL; |
| 167 | 168 |
| 168 // Convert existing components to digits. Values up to | 169 // Convert existing components to digits. Values up to |
| 169 // |existing_components| will be valid. | 170 // |existing_components| will be valid. |
| 170 uint32 component_values[4]; | 171 uint32_t component_values[4]; |
| 171 int existing_components = 0; | 172 int existing_components = 0; |
| 172 | 173 |
| 173 // Set to true if one or more components are BROKEN. BROKEN is only | 174 // Set to true if one or more components are BROKEN. BROKEN is only |
| 174 // returned if all components are IPV4 or BROKEN, so, for example, | 175 // returned if all components are IPV4 or BROKEN, so, for example, |
| 175 // 12345678912345.de returns NEUTRAL rather than broken. | 176 // 12345678912345.de returns NEUTRAL rather than broken. |
| 176 bool broken = false; | 177 bool broken = false; |
| 177 for (int i = 0; i < 4; i++) { | 178 for (int i = 0; i < 4; i++) { |
| 178 if (components[i].len <= 0) | 179 if (components[i].len <= 0) |
| 179 continue; | 180 continue; |
| 180 CanonHostInfo::Family family = IPv4ComponentToNumber( | 181 CanonHostInfo::Family family = IPv4ComponentToNumber( |
| 181 spec, components[i], &component_values[existing_components]); | 182 spec, components[i], &component_values[existing_components]); |
| 182 | 183 |
| 183 if (family == CanonHostInfo::BROKEN) { | 184 if (family == CanonHostInfo::BROKEN) { |
| 184 broken = true; | 185 broken = true; |
| 185 } else if (family != CanonHostInfo::IPV4) { | 186 } else if (family != CanonHostInfo::IPV4) { |
| 186 // Stop if we hit a non-BROKEN invalid non-empty component. | 187 // Stop if we hit a non-BROKEN invalid non-empty component. |
| 187 return family; | 188 return family; |
| 188 } | 189 } |
| 189 | 190 |
| 190 existing_components++; | 191 existing_components++; |
| 191 } | 192 } |
| 192 | 193 |
| 193 if (broken) | 194 if (broken) |
| 194 return CanonHostInfo::BROKEN; | 195 return CanonHostInfo::BROKEN; |
| 195 | 196 |
| 196 // Use that sequence of numbers to fill out the 4-component IP address. | 197 // Use that sequence of numbers to fill out the 4-component IP address. |
| 197 | 198 |
| 198 // First, process all components but the last, while making sure each fits | 199 // First, process all components but the last, while making sure each fits |
| 199 // within an 8-bit field. | 200 // within an 8-bit field. |
| 200 for (int i = 0; i < existing_components - 1; i++) { | 201 for (int i = 0; i < existing_components - 1; i++) { |
| 201 if (component_values[i] > kuint8max) | 202 if (component_values[i] > std::numeric_limits<uint8_t>::max()) |
| 202 return CanonHostInfo::BROKEN; | 203 return CanonHostInfo::BROKEN; |
| 203 address[i] = static_cast<unsigned char>(component_values[i]); | 204 address[i] = static_cast<unsigned char>(component_values[i]); |
| 204 } | 205 } |
| 205 | 206 |
| 206 // Next, consume the last component to fill in the remaining bytes. | 207 // Next, consume the last component to fill in the remaining bytes. |
| 207 // Work around a gcc 4.9 bug. crbug.com/392872 | 208 // Work around a gcc 4.9 bug. crbug.com/392872 |
| 208 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) | 209 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) |
| 209 #pragma GCC diagnostic push | 210 #pragma GCC diagnostic push |
| 210 #pragma GCC diagnostic ignored "-Warray-bounds" | 211 #pragma GCC diagnostic ignored "-Warray-bounds" |
| 211 #endif | 212 #endif |
| 212 uint32 last_value = component_values[existing_components - 1]; | 213 uint32_t last_value = component_values[existing_components - 1]; |
| 213 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) | 214 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) |
| 214 #pragma GCC diagnostic pop | 215 #pragma GCC diagnostic pop |
| 215 #endif | 216 #endif |
| 216 for (int i = 3; i >= existing_components - 1; i--) { | 217 for (int i = 3; i >= existing_components - 1; i--) { |
| 217 address[i] = static_cast<unsigned char>(last_value); | 218 address[i] = static_cast<unsigned char>(last_value); |
| 218 last_value >>= 8; | 219 last_value >>= 8; |
| 219 } | 220 } |
| 220 | 221 |
| 221 // If the last component has residual bits, report overflow. | 222 // If the last component has residual bits, report overflow. |
| 222 if (last_value != 0) | 223 if (last_value != 0) |
| (...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 433 } | 434 } |
| 434 | 435 |
| 435 // Check that the numbers add up. | 436 // Check that the numbers add up. |
| 436 if (num_bytes_without_contraction + num_bytes_of_contraction != 16) | 437 if (num_bytes_without_contraction + num_bytes_of_contraction != 16) |
| 437 return false; | 438 return false; |
| 438 | 439 |
| 439 *out_num_bytes_of_contraction = num_bytes_of_contraction; | 440 *out_num_bytes_of_contraction = num_bytes_of_contraction; |
| 440 return true; | 441 return true; |
| 441 } | 442 } |
| 442 | 443 |
| 443 // Converts a hex comonent into a number. This cannot fail since the caller has | 444 // Converts a hex component into a number. This cannot fail since the caller has |
| 444 // already verified that each character in the string was a hex digit, and | 445 // already verified that each character in the string was a hex digit, and |
| 445 // that there were no more than 4 characters. | 446 // that there were no more than 4 characters. |
| 446 template<typename CHAR> | 447 template <typename CHAR> |
| 447 uint16 IPv6HexComponentToNumber(const CHAR* spec, const Component& component) { | 448 uint16_t IPv6HexComponentToNumber(const CHAR* spec, |
| 449 const Component& component) { |
| 448 DCHECK(component.len <= 4); | 450 DCHECK(component.len <= 4); |
| 449 | 451 |
| 450 // Copy the hex string into a C-string. | 452 // Copy the hex string into a C-string. |
| 451 char buf[5]; | 453 char buf[5]; |
| 452 for (int i = 0; i < component.len; ++i) | 454 for (int i = 0; i < component.len; ++i) |
| 453 buf[i] = static_cast<char>(spec[component.begin + i]); | 455 buf[i] = static_cast<char>(spec[component.begin + i]); |
| 454 buf[component.len] = '\0'; | 456 buf[component.len] = '\0'; |
| 455 | 457 |
| 456 // Convert it to a number (overflow is not possible, since with 4 hex | 458 // Convert it to a number (overflow is not possible, since with 4 hex |
| 457 // characters we can at most have a 16 bit number). | 459 // characters we can at most have a 16 bit number). |
| 458 return static_cast<uint16>(_strtoui64(buf, NULL, 16)); | 460 return static_cast<uint16_t>(_strtoui64(buf, NULL, 16)); |
| 459 } | 461 } |
| 460 | 462 |
| 461 // Converts an IPv6 address to a 128-bit number (network byte order), returning | 463 // Converts an IPv6 address to a 128-bit number (network byte order), returning |
| 462 // true on success. False means that the input was not a valid IPv6 address. | 464 // true on success. False means that the input was not a valid IPv6 address. |
| 463 template<typename CHAR, typename UCHAR> | 465 template<typename CHAR, typename UCHAR> |
| 464 bool DoIPv6AddressToNumber(const CHAR* spec, | 466 bool DoIPv6AddressToNumber(const CHAR* spec, |
| 465 const Component& host, | 467 const Component& host, |
| 466 unsigned char address[16]) { | 468 unsigned char address[16]) { |
| 467 // Make sure the component is bounded by '[' and ']'. | 469 // Make sure the component is bounded by '[' and ']'. |
| 468 int end = host.end(); | 470 int end = host.end(); |
| (...skipping 21 matching lines...) Expand all Loading... |
| 490 // Loop through each hex components, and contraction in order. | 492 // Loop through each hex components, and contraction in order. |
| 491 for (int i = 0; i <= ipv6_parsed.num_hex_components; ++i) { | 493 for (int i = 0; i <= ipv6_parsed.num_hex_components; ++i) { |
| 492 // Append the contraction if it appears before this component. | 494 // Append the contraction if it appears before this component. |
| 493 if (i == ipv6_parsed.index_of_contraction) { | 495 if (i == ipv6_parsed.index_of_contraction) { |
| 494 for (int j = 0; j < num_bytes_of_contraction; ++j) | 496 for (int j = 0; j < num_bytes_of_contraction; ++j) |
| 495 address[cur_index_in_address++] = 0; | 497 address[cur_index_in_address++] = 0; |
| 496 } | 498 } |
| 497 // Append the hex component's value. | 499 // Append the hex component's value. |
| 498 if (i != ipv6_parsed.num_hex_components) { | 500 if (i != ipv6_parsed.num_hex_components) { |
| 499 // Get the 16-bit value for this hex component. | 501 // Get the 16-bit value for this hex component. |
| 500 uint16 number = IPv6HexComponentToNumber<CHAR>( | 502 uint16_t number = IPv6HexComponentToNumber<CHAR>( |
| 501 spec, ipv6_parsed.hex_components[i]); | 503 spec, ipv6_parsed.hex_components[i]); |
| 502 // Append to |address|, in network byte order. | 504 // Append to |address|, in network byte order. |
| 503 address[cur_index_in_address++] = (number & 0xFF00) >> 8; | 505 address[cur_index_in_address++] = (number & 0xFF00) >> 8; |
| 504 address[cur_index_in_address++] = (number & 0x00FF); | 506 address[cur_index_in_address++] = (number & 0x00FF); |
| 505 } | 507 } |
| 506 } | 508 } |
| 507 | 509 |
| 508 // If there was an IPv4 section, convert it into a 32-bit number and append | 510 // If there was an IPv4 section, convert it into a 32-bit number and append |
| 509 // it to |address|. | 511 // it to |address|. |
| 510 if (ipv6_parsed.ipv4_component.is_valid()) { | 512 if (ipv6_parsed.ipv4_component.is_valid()) { |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 569 for (int i = host.begin; i < host.end(); i++) { | 571 for (int i = host.begin; i < host.end(); i++) { |
| 570 switch (spec[i]) { | 572 switch (spec[i]) { |
| 571 case '[': | 573 case '[': |
| 572 case ']': | 574 case ']': |
| 573 case ':': | 575 case ':': |
| 574 host_info->family = CanonHostInfo::BROKEN; | 576 host_info->family = CanonHostInfo::BROKEN; |
| 575 return true; | 577 return true; |
| 576 } | 578 } |
| 577 } | 579 } |
| 578 | 580 |
| 579 // No invalid characters. Could still be IPv4 or a hostname. | 581 // No invalid characters. Could still be IPv4 or a hostname. |
| 580 host_info->family = CanonHostInfo::NEUTRAL; | 582 host_info->family = CanonHostInfo::NEUTRAL; |
| 581 return false; | 583 return false; |
| 582 } | 584 } |
| 583 | 585 |
| 584 host_info->out_host.begin = output->length(); | 586 host_info->out_host.begin = output->length(); |
| 585 output->push_back('['); | 587 output->push_back('['); |
| 586 AppendIPv6Address(host_info->address, output); | 588 AppendIPv6Address(host_info->address, output); |
| 587 output->push_back(']'); | 589 output->push_back(']'); |
| 588 host_info->out_host.len = output->length() - host_info->out_host.begin; | 590 host_info->out_host.len = output->length() - host_info->out_host.begin; |
| 589 | 591 |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 700 return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address); | 702 return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address); |
| 701 } | 703 } |
| 702 | 704 |
| 703 bool IPv6AddressToNumber(const base::char16* spec, | 705 bool IPv6AddressToNumber(const base::char16* spec, |
| 704 const Component& host, | 706 const Component& host, |
| 705 unsigned char address[16]) { | 707 unsigned char address[16]) { |
| 706 return DoIPv6AddressToNumber<base::char16, base::char16>(spec, host, address); | 708 return DoIPv6AddressToNumber<base::char16, base::char16>(spec, host, address); |
| 707 } | 709 } |
| 708 | 710 |
| 709 } // namespace url | 711 } // namespace url |
| OLD | NEW |