url_canon_ip.cc - Issue 2029803003: Update to Chromium //url at Chromium commit 79dc59ac7602413181079ecb463873e29a1d7d0a.

Side by Side Diff: url_canon_ip.cc

Issue 2029803003: Update to Chromium //url at Chromium commit 79dc59ac7602413181079ecb463873e29a1d7d0a. (Closed) Base URL: https://chromium.googlesource.com/external/github.com/domokit/gurl@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "url/url_canon_ip.h"	5 #include "url/url_canon_ip.h"

6	6

	7 #include <stdint.h>

7 #include <stdlib.h>	8 #include <stdlib.h>

	9 #include <limits>

8	10

9 #include "base/basictypes.h"

10 #include "base/logging.h"	11 #include "base/logging.h"

11 #include "url/url_canon_internal.h"	12 #include "url/url_canon_internal.h"

12	13

13 namespace url {	14 namespace url {

14	15

15 namespace {	16 namespace {

16	17

17 // Converts one of the character types that represent a numerical base to the	18 // Converts one of the character types that represent a numerical base to the

18 // corresponding base.	19 // corresponding base.

19 int BaseForType(SharedCharTypes type) {	20 int BaseForType(SharedCharTypes type) {

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
85 // - IPV4 - The number was valid, and did not overflow.	86 // - IPV4 - The number was valid, and did not overflow.

86 // - BROKEN - The input was numeric, but too large for a 32-bit field.	87 // - BROKEN - The input was numeric, but too large for a 32-bit field.

87 // - NEUTRAL - Input was not numeric.	88 // - NEUTRAL - Input was not numeric.

88 //	89 //

89 // The input is assumed to be ASCII. FindIPv4Components should have stripped	90 // The input is assumed to be ASCII. FindIPv4Components should have stripped

90 // out any input that is greater than 7 bits. The components are assumed	91 // out any input that is greater than 7 bits. The components are assumed

91 // to be non-empty.	92 // to be non-empty.

92 template<typename CHAR>	93 template<typename CHAR>

93 CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,	94 CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,

94 const Component& component,	95 const Component& component,

95 uint32* number) {	96 uint32_t* number) {

96 // Figure out the base	97 // Figure out the base

97 SharedCharTypes base;	98 SharedCharTypes base;

98 int base_prefix_len = 0; // Size of the prefix for this base.	99 int base_prefix_len = 0; // Size of the prefix for this base.

99 if (spec[component.begin] == '0') {	100 if (spec[component.begin] == '0') {

100 // Either hex or dec, or a standalone zero.	101 // Either hex or dec, or a standalone zero.

101 if (component.len == 1) {	102 if (component.len == 1) {

102 base = CHAR_DEC;	103 base = CHAR_DEC;

103 } else if (spec[component.begin + 1] == 'X' \|\|	104 } else if (spec[component.begin + 1] == 'X' \|\|

104 spec[component.begin + 1] == 'x') {	105 spec[component.begin + 1] == 'x') {

105 base = CHAR_HEX;	106 base = CHAR_HEX;

106 base_prefix_len = 2;	107 base_prefix_len = 2;

107 } else {	108 } else {

108 base = CHAR_OCT;	109 base = CHAR_OCT;

109 base_prefix_len = 1;	110 base_prefix_len = 1;

110 }	111 }

111 } else {	112 } else {

112 base = CHAR_DEC;	113 base = CHAR_DEC;

113 }	114 }

114	115

115 // Extend the prefix to consume all leading zeros.	116 // Extend the prefix to consume all leading zeros.

116 while (base_prefix_len < component.len &&	117 while (base_prefix_len < component.len &&

117 spec[component.begin + base_prefix_len] == '0')	118 spec[component.begin + base_prefix_len] == '0')

118 base_prefix_len++;	119 base_prefix_len++;

119	120

120 // Put the component, minus any base prefix, into a NULL-terminated buffer so	121 // Put the component, minus any base prefix, into a NULL-terminated buffer so

121 // we can call the standard library. Because leading zeros have already been	122 // we can call the standard library. Because leading zeros have already been

122 // discarded, filling the entire buffer is guaranteed to trigger the 32-bit	123 // discarded, filling the entire buffer is guaranteed to trigger the 32-bit

123 // overflow check.	124 // overflow check.

124 const int kMaxComponentLen = 16;	125 const int kMaxComponentLen = 16;

125 char buf[kMaxComponentLen + 1]; // digits + '\0'	126 char buf[kMaxComponentLen + 1]; // digits + '\0'

126 int dest_i = 0;	127 int dest_i = 0;

127 for (int i = component.begin + base_prefix_len; i < component.end(); i++) {	128 for (int i = component.begin + base_prefix_len; i < component.end(); i++) {

128 // We know the input is 7-bit, so convert to narrow (if this is the wide	129 // We know the input is 7-bit, so convert to narrow (if this is the wide

129 // version of the template) by casting.	130 // version of the template) by casting.

130 char input = static_cast<char>(spec[i]);	131 char input = static_cast<char>(spec[i]);

131	132

132 // Validate that this character is OK for the given base.	133 // Validate that this character is OK for the given base.

133 if (!IsCharOfType(input, base))	134 if (!IsCharOfType(input, base))

134 return CanonHostInfo::NEUTRAL;	135 return CanonHostInfo::NEUTRAL;

135	136

136 // Fill the buffer, if there's space remaining. This check allows us to	137 // Fill the buffer, if there's space remaining. This check allows us to

137 // verify that all characters are numeric, even those that don't fit.	138 // verify that all characters are numeric, even those that don't fit.

138 if (dest_i < kMaxComponentLen)	139 if (dest_i < kMaxComponentLen)

139 buf[dest_i++] = input;	140 buf[dest_i++] = input;

140 }	141 }

141	142

142 buf[dest_i] = '\0';	143 buf[dest_i] = '\0';

143	144

144 // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal	145 // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal

145 // number can overflow a 64-bit number in <= 16 characters).	146 // number can overflow a 64-bit number in <= 16 characters).

146 uint64 num = _strtoui64(buf, NULL, BaseForType(base));	147 uint64_t num = _strtoui64(buf, NULL, BaseForType(base));

147	148

148 // Check for 32-bit overflow.	149 // Check for 32-bit overflow.

149 if (num > kuint32max)	150 if (num > std::numeric_limits<uint32_t>::max())

150 return CanonHostInfo::BROKEN;	151 return CanonHostInfo::BROKEN;

151	152

152 // No overflow. Success!	153 // No overflow. Success!

153 *number = static_cast<uint32>(num);	154 *number = static_cast<uint32_t>(num);

154 return CanonHostInfo::IPV4;	155 return CanonHostInfo::IPV4;

155 }	156 }

156	157

157 // See declaration of IPv4AddressToNumber for documentation.	158 // See declaration of IPv4AddressToNumber for documentation.

158 template<typename CHAR>	159 template<typename CHAR>

159 CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec,	160 CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec,

160 const Component& host,	161 const Component& host,

161 unsigned char address[4],	162 unsigned char address[4],

162 int* num_ipv4_components) {	163 int* num_ipv4_components) {

163 // The identified components. Not all may exist.	164 // The identified components. Not all may exist.

164 Component components[4];	165 Component components[4];

165 if (!FindIPv4Components(spec, host, components))	166 if (!FindIPv4Components(spec, host, components))

166 return CanonHostInfo::NEUTRAL;	167 return CanonHostInfo::NEUTRAL;

167	168

168 // Convert existing components to digits. Values up to	169 // Convert existing components to digits. Values up to

169 // \|existing_components\| will be valid.	170 // \|existing_components\| will be valid.

170 uint32 component_values[4];	171 uint32_t component_values[4];

171 int existing_components = 0;	172 int existing_components = 0;

172	173

173 // Set to true if one or more components are BROKEN. BROKEN is only	174 // Set to true if one or more components are BROKEN. BROKEN is only

174 // returned if all components are IPV4 or BROKEN, so, for example,	175 // returned if all components are IPV4 or BROKEN, so, for example,

175 // 12345678912345.de returns NEUTRAL rather than broken.	176 // 12345678912345.de returns NEUTRAL rather than broken.

176 bool broken = false;	177 bool broken = false;

177 for (int i = 0; i < 4; i++) {	178 for (int i = 0; i < 4; i++) {

178 if (components[i].len <= 0)	179 if (components[i].len <= 0)

179 continue;	180 continue;

180 CanonHostInfo::Family family = IPv4ComponentToNumber(	181 CanonHostInfo::Family family = IPv4ComponentToNumber(

181 spec, components[i], &component_values[existing_components]);	182 spec, components[i], &component_values[existing_components]);

182	183

183 if (family == CanonHostInfo::BROKEN) {	184 if (family == CanonHostInfo::BROKEN) {

184 broken = true;	185 broken = true;

185 } else if (family != CanonHostInfo::IPV4) {	186 } else if (family != CanonHostInfo::IPV4) {

186 // Stop if we hit a non-BROKEN invalid non-empty component.	187 // Stop if we hit a non-BROKEN invalid non-empty component.

187 return family;	188 return family;

188 }	189 }

189	190

190 existing_components++;	191 existing_components++;

191 }	192 }

192	193

193 if (broken)	194 if (broken)

194 return CanonHostInfo::BROKEN;	195 return CanonHostInfo::BROKEN;

195	196

196 // Use that sequence of numbers to fill out the 4-component IP address.	197 // Use that sequence of numbers to fill out the 4-component IP address.

197	198

198 // First, process all components but the last, while making sure each fits	199 // First, process all components but the last, while making sure each fits

199 // within an 8-bit field.	200 // within an 8-bit field.

200 for (int i = 0; i < existing_components - 1; i++) {	201 for (int i = 0; i < existing_components - 1; i++) {

201 if (component_values[i] > kuint8max)	202 if (component_values[i] > std::numeric_limits<uint8_t>::max())

202 return CanonHostInfo::BROKEN;	203 return CanonHostInfo::BROKEN;

203 address[i] = static_cast<unsigned char>(component_values[i]);	204 address[i] = static_cast<unsigned char>(component_values[i]);

204 }	205 }

205	206

206 // Next, consume the last component to fill in the remaining bytes.	207 // Next, consume the last component to fill in the remaining bytes.

207 // Work around a gcc 4.9 bug. crbug.com/392872	208 // Work around a gcc 4.9 bug. crbug.com/392872

208 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) \|\| __GNUC__ > 4)	209 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) \|\| __GNUC__ > 4)

209 #pragma GCC diagnostic push	210 #pragma GCC diagnostic push

210 #pragma GCC diagnostic ignored "-Warray-bounds"	211 #pragma GCC diagnostic ignored "-Warray-bounds"

211 #endif	212 #endif

212 uint32 last_value = component_values[existing_components - 1];	213 uint32_t last_value = component_values[existing_components - 1];

213 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) \|\| __GNUC__ > 4)	214 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) \|\| __GNUC__ > 4)

214 #pragma GCC diagnostic pop	215 #pragma GCC diagnostic pop

215 #endif	216 #endif

216 for (int i = 3; i >= existing_components - 1; i--) {	217 for (int i = 3; i >= existing_components - 1; i--) {

217 address[i] = static_cast<unsigned char>(last_value);	218 address[i] = static_cast<unsigned char>(last_value);

218 last_value >>= 8;	219 last_value >>= 8;

219 }	220 }

220	221

221 // If the last component has residual bits, report overflow.	222 // If the last component has residual bits, report overflow.

222 if (last_value != 0)	223 if (last_value != 0)

(...skipping 210 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
433 }	434 }

434	435

435 // Check that the numbers add up.	436 // Check that the numbers add up.

436 if (num_bytes_without_contraction + num_bytes_of_contraction != 16)	437 if (num_bytes_without_contraction + num_bytes_of_contraction != 16)

437 return false;	438 return false;

438	439

439 *out_num_bytes_of_contraction = num_bytes_of_contraction;	440 *out_num_bytes_of_contraction = num_bytes_of_contraction;

440 return true;	441 return true;

441 }	442 }

442	443

443 // Converts a hex comonent into a number. This cannot fail since the caller has	444 // Converts a hex component into a number. This cannot fail since the caller has

444 // already verified that each character in the string was a hex digit, and	445 // already verified that each character in the string was a hex digit, and

445 // that there were no more than 4 characters.	446 // that there were no more than 4 characters.

446 template<typename CHAR>	447 template <typename CHAR>

447 uint16 IPv6HexComponentToNumber(const CHAR* spec, const Component& component) {	448 uint16_t IPv6HexComponentToNumber(const CHAR* spec,

	449 const Component& component) {

448 DCHECK(component.len <= 4);	450 DCHECK(component.len <= 4);

449	451

450 // Copy the hex string into a C-string.	452 // Copy the hex string into a C-string.

451 char buf[5];	453 char buf[5];

452 for (int i = 0; i < component.len; ++i)	454 for (int i = 0; i < component.len; ++i)

453 buf[i] = static_cast<char>(spec[component.begin + i]);	455 buf[i] = static_cast<char>(spec[component.begin + i]);

454 buf[component.len] = '\0';	456 buf[component.len] = '\0';

455	457

456 // Convert it to a number (overflow is not possible, since with 4 hex	458 // Convert it to a number (overflow is not possible, since with 4 hex

457 // characters we can at most have a 16 bit number).	459 // characters we can at most have a 16 bit number).

458 return static_cast<uint16>(_strtoui64(buf, NULL, 16));	460 return static_cast<uint16_t>(_strtoui64(buf, NULL, 16));

459 }	461 }

460	462

461 // Converts an IPv6 address to a 128-bit number (network byte order), returning	463 // Converts an IPv6 address to a 128-bit number (network byte order), returning

462 // true on success. False means that the input was not a valid IPv6 address.	464 // true on success. False means that the input was not a valid IPv6 address.

463 template<typename CHAR, typename UCHAR>	465 template<typename CHAR, typename UCHAR>

464 bool DoIPv6AddressToNumber(const CHAR* spec,	466 bool DoIPv6AddressToNumber(const CHAR* spec,

465 const Component& host,	467 const Component& host,

466 unsigned char address[16]) {	468 unsigned char address[16]) {

467 // Make sure the component is bounded by '[' and ']'.	469 // Make sure the component is bounded by '[' and ']'.

468 int end = host.end();	470 int end = host.end();

(...skipping 21 matching lines...) Expand all Loading...
490 // Loop through each hex components, and contraction in order.	492 // Loop through each hex components, and contraction in order.

491 for (int i = 0; i <= ipv6_parsed.num_hex_components; ++i) {	493 for (int i = 0; i <= ipv6_parsed.num_hex_components; ++i) {

492 // Append the contraction if it appears before this component.	494 // Append the contraction if it appears before this component.

493 if (i == ipv6_parsed.index_of_contraction) {	495 if (i == ipv6_parsed.index_of_contraction) {

494 for (int j = 0; j < num_bytes_of_contraction; ++j)	496 for (int j = 0; j < num_bytes_of_contraction; ++j)

495 address[cur_index_in_address++] = 0;	497 address[cur_index_in_address++] = 0;

496 }	498 }

497 // Append the hex component's value.	499 // Append the hex component's value.

498 if (i != ipv6_parsed.num_hex_components) {	500 if (i != ipv6_parsed.num_hex_components) {

499 // Get the 16-bit value for this hex component.	501 // Get the 16-bit value for this hex component.

500 uint16 number = IPv6HexComponentToNumber<CHAR>(	502 uint16_t number = IPv6HexComponentToNumber<CHAR>(

501 spec, ipv6_parsed.hex_components[i]);	503 spec, ipv6_parsed.hex_components[i]);

502 // Append to \|address\|, in network byte order.	504 // Append to \|address\|, in network byte order.

503 address[cur_index_in_address++] = (number & 0xFF00) >> 8;	505 address[cur_index_in_address++] = (number & 0xFF00) >> 8;

504 address[cur_index_in_address++] = (number & 0x00FF);	506 address[cur_index_in_address++] = (number & 0x00FF);

505 }	507 }

506 }	508 }

507	509

508 // If there was an IPv4 section, convert it into a 32-bit number and append	510 // If there was an IPv4 section, convert it into a 32-bit number and append

509 // it to \|address\|.	511 // it to \|address\|.

510 if (ipv6_parsed.ipv4_component.is_valid()) {	512 if (ipv6_parsed.ipv4_component.is_valid()) {

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
569 for (int i = host.begin; i < host.end(); i++) {	571 for (int i = host.begin; i < host.end(); i++) {

570 switch (spec[i]) {	572 switch (spec[i]) {

571 case '[':	573 case '[':

572 case ']':	574 case ']':

573 case ':':	575 case ':':

574 host_info->family = CanonHostInfo::BROKEN;	576 host_info->family = CanonHostInfo::BROKEN;

575 return true;	577 return true;

576 }	578 }

577 }	579 }

578	580

579 // No invalid characters. Could still be IPv4 or a hostname.	581 // No invalid characters. Could still be IPv4 or a hostname.

580 host_info->family = CanonHostInfo::NEUTRAL;	582 host_info->family = CanonHostInfo::NEUTRAL;

581 return false;	583 return false;

582 }	584 }

583	585

584 host_info->out_host.begin = output->length();	586 host_info->out_host.begin = output->length();

585 output->push_back('[');	587 output->push_back('[');

586 AppendIPv6Address(host_info->address, output);	588 AppendIPv6Address(host_info->address, output);

587 output->push_back(']');	589 output->push_back(']');

588 host_info->out_host.len = output->length() - host_info->out_host.begin;	590 host_info->out_host.len = output->length() - host_info->out_host.begin;

589	591

(...skipping 110 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
700 return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address);	702 return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address);

701 }	703 }

702	704

703 bool IPv6AddressToNumber(const base::char16* spec,	705 bool IPv6AddressToNumber(const base::char16* spec,

704 const Component& host,	706 const Component& host,

705 unsigned char address[16]) {	707 unsigned char address[16]) {

706 return DoIPv6AddressToNumber<base::char16, base::char16>(spec, host, address);	708 return DoIPv6AddressToNumber<base::char16, base::char16>(spec, host, address);

707 }	709 }

708	710

709 } // namespace url	711 } // namespace url

OLD	NEW

« no previous file with comments | « url_canon_ip.h ('k') | url_canon_mailtourl.cc » ('j') | no next file with comments »