url/url_parse.cc - Issue 20349002: Stop pulling googleurl through DEPS.

Side by Side Diff: url/url_parse.cc

Issue 20349002: Stop pulling googleurl through DEPS. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: fix android? Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 /* Based on nsURLParsers.cc from Mozilla

2 * -------------------------------------

3 * The contents of this file are subject to the Mozilla Public License Version

4 * 1.1 (the "License"); you may not use this file except in compliance with

5 * the License. You may obtain a copy of the License at

6 * http://www.mozilla.org/MPL/

7 *

8 * Software distributed under the License is distributed on an "AS IS" basis,

9 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License

10 * for the specific language governing rights and limitations under the

11 * License.

12 *

13 * The Original Code is mozilla.org code.

14 *

15 * The Initial Developer of the Original Code is

16 * Netscape Communications Corporation.

17 * Portions created by the Initial Developer are Copyright (C) 1998

18 * the Initial Developer. All Rights Reserved.

19 *

20 * Contributor(s):

21 * Darin Fisher (original author)

22 *

23 * Alternatively, the contents of this file may be used under the terms of

24 * either the GNU General Public License Version 2 or later (the "GPL"), or

25 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),

26 * in which case the provisions of the GPL or the LGPL are applicable instead

27 * of those above. If you wish to allow use of your version of this file only

28 * under the terms of either the GPL or the LGPL, and not to allow others to

29 * use your version of this file under the terms of the MPL, indicate your

30 * decision by deleting the provisions above and replace them with the notice

31 * and other provisions required by the GPL or the LGPL. If you do not delete

32 * the provisions above, a recipient may use your version of this file under

33 * the terms of any one of the MPL, the GPL or the LGPL.

34 *

35 * *** END LICENSE BLOCK *** */

36

37 #include "url/url_parse.h"

38

39 #include <stdlib.h>

40

41 #include "base/logging.h"

42 #include "url/url_parse_internal.h"

43 #include "url/url_util.h"

44 #include "url/url_util_internal.h"

45

46 namespace url_parse {

47

48 namespace {

49

50 // Returns true if the given character is a valid digit to use in a port.

51 inline bool IsPortDigit(base::char16 ch) {

52 return ch >= '0' && ch <= '9';

53 }

54

55 // Returns the offset of the next authority terminator in the input starting

56 // from start_offset. If no terminator is found, the return value will be equal

57 // to spec_len.

58 template<typename CHAR>

59 int FindNextAuthorityTerminator(const CHAR* spec,

60 int start_offset,

61 int spec_len) {

62 for (int i = start_offset; i < spec_len; i++) {

63 if (IsAuthorityTerminator(spec[i]))

64 return i;

65 }

66 return spec_len; // Not found.

67 }

68

69 template<typename CHAR>

70 void ParseUserInfo(const CHAR* spec,

71 const Component& user,

72 Component* username,

73 Component* password) {

74 // Find the first colon in the user section, which separates the username and

75 // password.

76 int colon_offset = 0;

77 while (colon_offset < user.len && spec[user.begin + colon_offset] != ':')

78 colon_offset++;

79

80 if (colon_offset < user.len) {

81 // Found separator: <username>:<password>

82 *username = Component(user.begin, colon_offset);

83 *password = MakeRange(user.begin + colon_offset + 1,

84 user.begin + user.len);

85 } else {

86 // No separator, treat everything as the username

87 *username = user;

88 *password = Component();

89 }

90 }

91

92 template<typename CHAR>

93 void ParseServerInfo(const CHAR* spec,

94 const Component& serverinfo,

95 Component* hostname,

96 Component* port_num) {

97 if (serverinfo.len == 0) {

98 // No server info, host name is empty.

99 hostname->reset();

100 port_num->reset();

101 return;

102 }

103

104 // If the host starts with a left-bracket, assume the entire host is an

105 // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal.

106 // This assumption will be overridden if we find a right-bracket.

107 //

108 // Our IPv6 address canonicalization code requires both brackets to exist,

109 // but the ability to locate an incomplete address can still be useful.

110 int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1;

111 int colon = -1;

112

113 // Find the last right-bracket, and the last colon.

114 for (int i = serverinfo.begin; i < serverinfo.end(); i++) {

115 switch (spec[i]) {

116 case ']':

117 ipv6_terminator = i;

118 break;

119 case ':':

120 colon = i;

121 break;

122 }

123 }

124

125 if (colon > ipv6_terminator) {

126 // Found a port number: <hostname>:<port>

127 *hostname = MakeRange(serverinfo.begin, colon);

128 if (hostname->len == 0)

129 hostname->reset();

130 *port_num = MakeRange(colon + 1, serverinfo.end());

131 } else {

132 // No port: <hostname>

133 *hostname = serverinfo;

134 port_num->reset();

135 }

136 }

137

138 // Given an already-identified auth section, breaks it into its consituent

139 // parts. The port number will be parsed and the resulting integer will be

140 // filled into the given *port variable, or -1 if there is no port number or it

141 // is invalid.

142 template<typename CHAR>

143 void DoParseAuthority(const CHAR* spec,

144 const Component& auth,

145 Component* username,

146 Component* password,

147 Component* hostname,

148 Component* port_num) {

149 DCHECK(auth.is_valid()) << "We should always get an authority";

150 if (auth.len == 0) {

151 username->reset();

152 password->reset();

153 hostname->reset();

154 port_num->reset();

155 return;

156 }

157

158 // Search backwards for @, which is the separator between the user info and

159 // the server info.

160 int i = auth.begin + auth.len - 1;

161 while (i > auth.begin && spec[i] != '@')

162 i--;

163

164 if (spec[i] == '@') {

165 // Found user info: <user-info>@<server-info>

166 ParseUserInfo(spec, Component(auth.begin, i - auth.begin),

167 username, password);

168 ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len),

169 hostname, port_num);

170 } else {

171 // No user info, everything is server info.

172 username->reset();

173 password->reset();

174 ParseServerInfo(spec, auth, hostname, port_num);

175 }

176 }

177

178 template<typename CHAR>

179 void ParsePath(const CHAR* spec,

180 const Component& path,

181 Component* filepath,

182 Component* query,

183 Component* ref) {

184 // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>

185

186 // Special case when there is no path.

187 if (path.len == -1) {

188 filepath->reset();

189 query->reset();

190 ref->reset();

191 return;

192 }

193 DCHECK(path.len > 0) << "We should never have 0 length paths";

194

195 // Search for first occurrence of either ? or #.

196 int path_end = path.begin + path.len;

197

198 int query_separator = -1; // Index of the '?'

199 int ref_separator = -1; // Index of the '#'

200 for (int i = path.begin; i < path_end; i++) {

201 switch (spec[i]) {

202 case '?':

203 // Only match the query string if it precedes the reference fragment

204 // and when we haven't found one already.

205 if (ref_separator < 0 && query_separator < 0)

206 query_separator = i;

207 break;

208 case '#':

209 // Record the first # sign only.

210 if (ref_separator < 0)

211 ref_separator = i;

212 break;

213 }

214 }

215

216 // Markers pointing to the character after each of these corresponding

217 // components. The code below words from the end back to the beginning,

218 // and will update these indices as it finds components that exist.

219 int file_end, query_end;

220

221 // Ref fragment: from the # to the end of the path.

222 if (ref_separator >= 0) {

223 file_end = query_end = ref_separator;

224 *ref = MakeRange(ref_separator + 1, path_end);

225 } else {

226 file_end = query_end = path_end;

227 ref->reset();

228 }

229

230 // Query fragment: everything from the ? to the next boundary (either the end

231 // of the path or the ref fragment).

232 if (query_separator >= 0) {

233 file_end = query_separator;

234 *query = MakeRange(query_separator + 1, query_end);

235 } else {

236 query->reset();

237 }

238

239 // File path: treat an empty file path as no file path.

240 if (file_end != path.begin)

241 *filepath = MakeRange(path.begin, file_end);

242 else

243 filepath->reset();

244 }

245

246 template<typename CHAR>

247 bool DoExtractScheme(const CHAR* url,

248 int url_len,

249 Component* scheme) {

250 // Skip leading whitespace and control characters.

251 int begin = 0;

252 while (begin < url_len && ShouldTrimFromURL(url[begin]))

253 begin++;

254 if (begin == url_len)

255 return false; // Input is empty or all whitespace.

256

257 // Find the first colon character.

258 for (int i = begin; i < url_len; i++) {

259 if (url[i] == ':') {

260 *scheme = MakeRange(begin, i);

261 return true;

262 }

263 }

264 return false; // No colon found: no scheme

265 }

266

267 // Fills in all members of the Parsed structure except for the scheme.

268 //

269 // \|spec\| is the full spec being parsed, of length \|spec_len\|.

270 // \|after_scheme\| is the character immediately following the scheme (after the

271 // colon) where we'll begin parsing.

272 //

273 // Compatability data points. I list "host", "path" extracted:

274 // Input IE6 Firefox Us

275 // ----- -------------- -------------- --------------

276 // http://foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"

277 // http:foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"

278 // http:/foo.com/ fail(*) "foo.com", "/" "foo.com", "/"

279 // http:\foo.com/ fail(*) "\foo.com", "/"(fail) "foo.com", "/"

280 // http:////foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"

281 //

282 // (*) Interestingly, although IE fails to load these URLs, its history

283 // canonicalizer handles them, meaning if you've been to the corresponding

284 // "http://foo.com/" link, it will be colored.

285 template <typename CHAR>

286 void DoParseAfterScheme(const CHAR* spec,

287 int spec_len,

288 int after_scheme,

289 Parsed* parsed) {

290 int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);

291 int after_slashes = after_scheme + num_slashes;

292

293 // First split into two main parts, the authority (username, password, host,

294 // and port) and the full path (path, query, and reference).

295 Component authority;

296 Component full_path;

297

298 // Found "//<some data>", looks like an authority section. Treat everything

299 // from there to the next slash (or end of spec) to be the authority. Note

300 // that we ignore the number of slashes and treat it as the authority.

301 int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);

302 authority = Component(after_slashes, end_auth - after_slashes);

303

304 if (end_auth == spec_len) // No beginning of path found.

305 full_path = Component();

306 else // Everything starting from the slash to the end is the path.

307 full_path = Component(end_auth, spec_len - end_auth);

308

309 // Now parse those two sub-parts.

310 DoParseAuthority(spec, authority, &parsed->username, &parsed->password,

311 &parsed->host, &parsed->port);

312 ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);

313 }

314

315 // The main parsing function for standard URLs. Standard URLs have a scheme,

316 // host, path, etc.

317 template<typename CHAR>

318 void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) {

319 DCHECK(spec_len >= 0);

320

321 // Strip leading & trailing spaces and control characters.

322 int begin = 0;

323 TrimURL(spec, &begin, &spec_len);

324

325 int after_scheme;

326 if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {

327 after_scheme = parsed->scheme.end() + 1; // Skip past the colon.

328 } else {

329 // Say there's no scheme when there is no colon. We could also say that

330 // everything is the scheme. Both would produce an invalid URL, but this way

331 // seems less wrong in more cases.

332 parsed->scheme.reset();

333 after_scheme = begin;

334 }

335 DoParseAfterScheme(spec, spec_len, after_scheme, parsed);

336 }

337

338 template<typename CHAR>

339 void DoParseFileSystemURL(const CHAR* spec, int spec_len, Parsed* parsed) {

340 DCHECK(spec_len >= 0);

341

342 // Get the unused parts of the URL out of the way.

343 parsed->username.reset();

344 parsed->password.reset();

345 parsed->host.reset();

346 parsed->port.reset();

347 parsed->path.reset(); // May use this; reset for convenience.

348 parsed->ref.reset(); // May use this; reset for convenience.

349 parsed->query.reset(); // May use this; reset for convenience.

350 parsed->clear_inner_parsed(); // May use this; reset for convenience.

351

352 // Strip leading & trailing spaces and control characters.

353 int begin = 0;

354 TrimURL(spec, &begin, &spec_len);

355

356 // Handle empty specs or ones that contain only whitespace or control chars.

357 if (begin == spec_len) {

358 parsed->scheme.reset();

359 return;

360 }

361

362 int inner_start = -1;

363

364 // Extract the scheme. We also handle the case where there is no scheme.

365 if (DoExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {

366 // Offset the results since we gave ExtractScheme a substring.

367 parsed->scheme.begin += begin;

368

369 if (parsed->scheme.end() == spec_len - 1)

370 return;

371

372 inner_start = parsed->scheme.end() + 1;

373 } else {

374 // No scheme found; that's not valid for filesystem URLs.

375 parsed->scheme.reset();

376 return;

377 }

378

379 url_parse::Component inner_scheme;

380 const CHAR* inner_spec = &spec[inner_start];

381 int inner_spec_len = spec_len - inner_start;

382

383 if (DoExtractScheme(inner_spec, inner_spec_len, &inner_scheme)) {

384 // Offset the results since we gave ExtractScheme a substring.

385 inner_scheme.begin += inner_start;

386

387 if (inner_scheme.end() == spec_len - 1)

388 return;

389 } else {

390 // No scheme found; that's not valid for filesystem URLs.

391 // The best we can do is return "filesystem://".

392 return;

393 }

394

395 Parsed inner_parsed;

396

397 if (url_util::CompareSchemeComponent(

398 spec, inner_scheme, url_util::kFileScheme)) {

399 // File URLs are special.

400 ParseFileURL(inner_spec, inner_spec_len, &inner_parsed);

401 } else if (url_util::CompareSchemeComponent(spec, inner_scheme,

402 url_util::kFileSystemScheme)) {

403 // Filesystem URLs don't nest.

404 return;

405 } else if (url_util::IsStandard(spec, inner_scheme)) {

406 // All "normal" URLs.

407 DoParseStandardURL(inner_spec, inner_spec_len, &inner_parsed);

408 } else {

409 return;

410 }

411

412 // All members of inner_parsed need to be offset by inner_start.

413 // If we had any scheme that supported nesting more than one level deep,

414 // we'd have to recurse into the inner_parsed's inner_parsed when

415 // adjusting by inner_start.

416 inner_parsed.scheme.begin += inner_start;

417 inner_parsed.username.begin += inner_start;

418 inner_parsed.password.begin += inner_start;

419 inner_parsed.host.begin += inner_start;

420 inner_parsed.port.begin += inner_start;

421 inner_parsed.query.begin += inner_start;

422 inner_parsed.ref.begin += inner_start;

423 inner_parsed.path.begin += inner_start;

424

425 // Query and ref move from inner_parsed to parsed.

426 parsed->query = inner_parsed.query;

427 inner_parsed.query.reset();

428 parsed->ref = inner_parsed.ref;

429 inner_parsed.ref.reset();

430

431 parsed->set_inner_parsed(inner_parsed);

432 if (!inner_parsed.scheme.is_valid() \|\| !inner_parsed.path.is_valid() \|\|

433 inner_parsed.inner_parsed()) {

434 return;

435 }

436

437 // The path in inner_parsed should start with a slash, then have a filesystem

438 // type followed by a slash. From the first slash up to but excluding the

439 // second should be what it keeps; the rest goes to parsed. If the path ends

440 // before the second slash, it's still pretty clear what the user meant, so

441 // we'll let that through.

442 if (!IsURLSlash(spec[inner_parsed.path.begin])) {

443 return;

444 }

445 int inner_path_end = inner_parsed.path.begin + 1; // skip the leading slash

446 while (inner_path_end < spec_len &&

447 !IsURLSlash(spec[inner_path_end]))

448 ++inner_path_end;

449 parsed->path.begin = inner_path_end;

450 int new_inner_path_length = inner_path_end - inner_parsed.path.begin;

451 parsed->path.len = inner_parsed.path.len - new_inner_path_length;

452 parsed->inner_parsed()->path.len = new_inner_path_length;

453 }

454

455 // Initializes a path URL which is merely a scheme followed by a path. Examples

456 // include "about:foo" and "javascript:alert('bar');"

457 template<typename CHAR>

458 void DoParsePathURL(const CHAR* spec, int spec_len, Parsed* parsed) {

459 // Get the non-path and non-scheme parts of the URL out of the way, we never

460 // use them.

461 parsed->username.reset();

462 parsed->password.reset();

463 parsed->host.reset();

464 parsed->port.reset();

465 parsed->query.reset();

466 parsed->ref.reset();

467

468 // Strip leading & trailing spaces and control characters.

469 int begin = 0;

470 TrimURL(spec, &begin, &spec_len);

471

472 // Handle empty specs or ones that contain only whitespace or control chars.

473 if (begin == spec_len) {

474 parsed->scheme.reset();

475 parsed->path.reset();

476 return;

477 }

478

479 // Extract the scheme, with the path being everything following. We also

480 // handle the case where there is no scheme.

481 if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {

482 // Offset the results since we gave ExtractScheme a substring.

483 parsed->scheme.begin += begin;

484

485 // For compatability with the standard URL parser, we treat no path as

486 // -1, rather than having a length of 0 (we normally wouldn't care so

487 // much for these non-standard URLs).

488 if (parsed->scheme.end() == spec_len - 1)

489 parsed->path.reset();

490 else

491 parsed->path = MakeRange(parsed->scheme.end() + 1, spec_len);

492 } else {

493 // No scheme found, just path.

494 parsed->scheme.reset();

495 parsed->path = MakeRange(begin, spec_len);

496 }

497 }

498

499 template<typename CHAR>

500 void DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) {

501 DCHECK(spec_len >= 0);

502

503 // Get the non-path and non-scheme parts of the URL out of the way, we never

504 // use them.

505 parsed->username.reset();

506 parsed->password.reset();

507 parsed->host.reset();

508 parsed->port.reset();

509 parsed->ref.reset();

510 parsed->query.reset(); // May use this; reset for convenience.

511

512 // Strip leading & trailing spaces and control characters.

513 int begin = 0;

514 TrimURL(spec, &begin, &spec_len);

515

516 // Handle empty specs or ones that contain only whitespace or control chars.

517 if (begin == spec_len) {

518 parsed->scheme.reset();

519 parsed->path.reset();

520 return;

521 }

522

523 int path_begin = -1;

524 int path_end = -1;

525

526 // Extract the scheme, with the path being everything following. We also

527 // handle the case where there is no scheme.

528 if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {

529 // Offset the results since we gave ExtractScheme a substring.

530 parsed->scheme.begin += begin;

531

532 if (parsed->scheme.end() != spec_len - 1) {

533 path_begin = parsed->scheme.end() + 1;

534 path_end = spec_len;

535 }

536 } else {

537 // No scheme found, just path.

538 parsed->scheme.reset();

539 path_begin = begin;

540 path_end = spec_len;

541 }

542

543 // Split [path_begin, path_end) into a path + query.

544 for (int i = path_begin; i < path_end; ++i) {

545 if (spec[i] == '?') {

546 parsed->query = MakeRange(i + 1, path_end);

547 path_end = i;

548 break;

549 }

550 }

551

552 // For compatability with the standard URL parser, treat no path as

553 // -1, rather than having a length of 0

554 if (path_begin == path_end) {

555 parsed->path.reset();

556 } else {

557 parsed->path = MakeRange(path_begin, path_end);

558 }

559 }

560

561 // Converts a port number in a string to an integer. We'd like to just call

562 // sscanf but our input is not NULL-terminated, which sscanf requires. Instead,

563 // we copy the digits to a small stack buffer (since we know the maximum number

564 // of digits in a valid port number) that we can NULL terminate.

565 template<typename CHAR>

566 int DoParsePort(const CHAR* spec, const Component& component) {

567 // Easy success case when there is no port.

568 const int kMaxDigits = 5;

569 if (!component.is_nonempty())

570 return PORT_UNSPECIFIED;

571

572 // Skip over any leading 0s.

573 Component digits_comp(component.end(), 0);

574 for (int i = 0; i < component.len; i++) {

575 if (spec[component.begin + i] != '0') {

576 digits_comp = MakeRange(component.begin + i, component.end());

577 break;

578 }

579 }

580 if (digits_comp.len == 0)

581 return 0; // All digits were 0.

582

583 // Verify we don't have too many digits (we'll be copying to our buffer so

584 // we need to double-check).

585 if (digits_comp.len > kMaxDigits)

586 return PORT_INVALID;

587

588 // Copy valid digits to the buffer.

589 char digits[kMaxDigits + 1]; // +1 for null terminator

590 for (int i = 0; i < digits_comp.len; i++) {

591 CHAR ch = spec[digits_comp.begin + i];

592 if (!IsPortDigit(ch)) {

593 // Invalid port digit, fail.

594 return PORT_INVALID;

595 }

596 digits[i] = static_cast<char>(ch);

597 }

598

599 // Null-terminate the string and convert to integer. Since we guarantee

600 // only digits, atoi's lack of error handling is OK.

601 digits[digits_comp.len] = 0;

602 int port = atoi(digits);

603 if (port > 65535)

604 return PORT_INVALID; // Out of range.

605 return port;

606 }

607

608 template<typename CHAR>

609 void DoExtractFileName(const CHAR* spec,

610 const Component& path,

611 Component* file_name) {

612 // Handle empty paths: they have no file names.

613 if (!path.is_nonempty()) {

614 file_name->reset();

615 return;

616 }

617

618 // Search backwards for a parameter, which is a normally unused field in a

619 // URL delimited by a semicolon. We parse the parameter as part of the

620 // path, but here, we don't want to count it. The last semicolon is the

621 // parameter. The path should start with a slash, so we don't need to check

622 // the first one.

623 int file_end = path.end();

624 for (int i = path.end() - 1; i > path.begin; i--) {

625 if (spec[i] == ';') {

626 file_end = i;

627 break;

628 }

629 }

630

631 // Now search backwards from the filename end to the previous slash

632 // to find the beginning of the filename.

633 for (int i = file_end - 1; i >= path.begin; i--) {

634 if (IsURLSlash(spec[i])) {

635 // File name is everything following this character to the end

636 *file_name = MakeRange(i + 1, file_end);

637 return;

638 }

639 }

640

641 // No slash found, this means the input was degenerate (generally paths

642 // will start with a slash). Let's call everything the file name.

643 *file_name = MakeRange(path.begin, file_end);

644 return;

645 }

646

647 template<typename CHAR>

648 bool DoExtractQueryKeyValue(const CHAR* spec,

649 Component* query,

650 Component* key,

651 Component* value) {

652 if (!query->is_nonempty())

653 return false;

654

655 int start = query->begin;

656 int cur = start;

657 int end = query->end();

658

659 // We assume the beginning of the input is the beginning of the "key" and we

660 // skip to the end of it.

661 key->begin = cur;

662 while (cur < end && spec[cur] != '&' && spec[cur] != '=')

663 cur++;

664 key->len = cur - key->begin;

665

666 // Skip the separator after the key (if any).

667 if (cur < end && spec[cur] == '=')

668 cur++;

669

670 // Find the value part.

671 value->begin = cur;

672 while (cur < end && spec[cur] != '&')

673 cur++;

674 value->len = cur - value->begin;

675

676 // Finally skip the next separator if any

677 if (cur < end && spec[cur] == '&')

678 cur++;

679

680 // Save the new query

681 *query = url_parse::MakeRange(cur, end);

682 return true;

683 }

684

685 } // namespace

686

687 Parsed::Parsed() : inner_parsed_(NULL) {

688 }

689

690 Parsed::Parsed(const Parsed& other) :

691 scheme(other.scheme),

692 username(other.username),

693 password(other.password),

694 host(other.host),

695 port(other.port),

696 path(other.path),

697 query(other.query),

698 ref(other.ref),

699 inner_parsed_(NULL) {

700 if (other.inner_parsed_)

701 set_inner_parsed(*other.inner_parsed_);

702 }

703

704 Parsed& Parsed::operator=(const Parsed& other) {

705 if (this != &other) {

706 scheme = other.scheme;

707 username = other.username;

708 password = other.password;

709 host = other.host;

710 port = other.port;

711 path = other.path;

712 query = other.query;

713 ref = other.ref;

714 if (other.inner_parsed_)

715 set_inner_parsed(*other.inner_parsed_);

716 else

717 clear_inner_parsed();

718 }

719 return *this;

720 }

721

722 Parsed::~Parsed() {

723 delete inner_parsed_;

724 }

725

726 int Parsed::Length() const {

727 if (ref.is_valid())

728 return ref.end();

729 return CountCharactersBefore(REF, false);

730 }

731

732 int Parsed::CountCharactersBefore(ComponentType type,

733 bool include_delimiter) const {

734 if (type == SCHEME)

735 return scheme.begin;

736

737 // There will be some characters after the scheme like "://" and we don't

738 // know how many. Search forwards for the next thing until we find one.

739 int cur = 0;

740 if (scheme.is_valid())

741 cur = scheme.end() + 1; // Advance over the ':' at the end of the scheme.

742

743 if (username.is_valid()) {

744 if (type <= USERNAME)

745 return username.begin;

746 cur = username.end() + 1; // Advance over the '@' or ':' at the end.

747 }

748

749 if (password.is_valid()) {

750 if (type <= PASSWORD)

751 return password.begin;

752 cur = password.end() + 1; // Advance over the '@' at the end.

753 }

754

755 if (host.is_valid()) {

756 if (type <= HOST)

757 return host.begin;

758 cur = host.end();

759 }

760

761 if (port.is_valid()) {

762 if (type < PORT \|\| (type == PORT && include_delimiter))

763 return port.begin - 1; // Back over delimiter.

764 if (type == PORT)

765 return port.begin; // Don't want delimiter counted.

766 cur = port.end();

767 }

768

769 if (path.is_valid()) {

770 if (type <= PATH)

771 return path.begin;

772 cur = path.end();

773 }

774

775 if (query.is_valid()) {

776 if (type < QUERY \|\| (type == QUERY && include_delimiter))

777 return query.begin - 1; // Back over delimiter.

778 if (type == QUERY)

779 return query.begin; // Don't want delimiter counted.

780 cur = query.end();

781 }

782

783 if (ref.is_valid()) {

784 if (type == REF && !include_delimiter)

785 return ref.begin; // Back over delimiter.

786

787 // When there is a ref and we get here, the component we wanted was before

788 // this and not found, so we always know the beginning of the ref is right.

789 return ref.begin - 1; // Don't want delimiter counted.

790 }

791

792 return cur;

793 }

794

795 bool ExtractScheme(const char* url, int url_len, Component* scheme) {

796 return DoExtractScheme(url, url_len, scheme);

797 }

798

799 bool ExtractScheme(const base::char16* url, int url_len, Component* scheme) {

800 return DoExtractScheme(url, url_len, scheme);

801 }

802

803 // This handles everything that may be an authority terminator, including

804 // backslash. For special backslash handling see DoParseAfterScheme.

805 bool IsAuthorityTerminator(base::char16 ch) {

806 return IsURLSlash(ch) \|\| ch == '?' \|\| ch == '#';

807 }

808

809 void ExtractFileName(const char* url,

810 const Component& path,

811 Component* file_name) {

812 DoExtractFileName(url, path, file_name);

813 }

814

815 void ExtractFileName(const base::char16* url,

816 const Component& path,

817 Component* file_name) {

818 DoExtractFileName(url, path, file_name);

819 }

820

821 bool ExtractQueryKeyValue(const char* url,

822 Component* query,

823 Component* key,

824 Component* value) {

825 return DoExtractQueryKeyValue(url, query, key, value);

826 }

827

828 bool ExtractQueryKeyValue(const base::char16* url,

829 Component* query,

830 Component* key,

831 Component* value) {

832 return DoExtractQueryKeyValue(url, query, key, value);

833 }

834

835 void ParseAuthority(const char* spec,

836 const Component& auth,

837 Component* username,

838 Component* password,

839 Component* hostname,

840 Component* port_num) {

841 DoParseAuthority(spec, auth, username, password, hostname, port_num);

842 }

843

844 void ParseAuthority(const base::char16* spec,

845 const Component& auth,

846 Component* username,

847 Component* password,

848 Component* hostname,

849 Component* port_num) {

850 DoParseAuthority(spec, auth, username, password, hostname, port_num);

851 }

852

853 int ParsePort(const char* url, const Component& port) {

854 return DoParsePort(url, port);

855 }

856

857 int ParsePort(const base::char16* url, const Component& port) {

858 return DoParsePort(url, port);

859 }

860

861 void ParseStandardURL(const char* url, int url_len, Parsed* parsed) {

862 DoParseStandardURL(url, url_len, parsed);

863 }

864

865 void ParseStandardURL(const base::char16* url, int url_len, Parsed* parsed) {

866 DoParseStandardURL(url, url_len, parsed);

867 }

868

869 void ParsePathURL(const char* url, int url_len, Parsed* parsed) {

870 DoParsePathURL(url, url_len, parsed);

871 }

872

873 void ParsePathURL(const base::char16* url, int url_len, Parsed* parsed) {

874 DoParsePathURL(url, url_len, parsed);

875 }

876

877 void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) {

878 DoParseFileSystemURL(url, url_len, parsed);

879 }

880

881 void ParseFileSystemURL(const base::char16* url, int url_len, Parsed* parsed) {

882 DoParseFileSystemURL(url, url_len, parsed);

883 }

884

885 void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {

886 DoParseMailtoURL(url, url_len, parsed);

887 }

888

889 void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed) {

890 DoParseMailtoURL(url, url_len, parsed);

891 }

892

893 void ParsePathInternal(const char* spec,

894 const Component& path,

895 Component* filepath,

896 Component* query,

897 Component* ref) {

898 ParsePath(spec, path, filepath, query, ref);

899 }

900

901 void ParsePathInternal(const base::char16* spec,

902 const Component& path,

903 Component* filepath,

904 Component* query,

905 Component* ref) {

906 ParsePath(spec, path, filepath, query, ref);

907 }

908

909 void ParseAfterScheme(const char* spec,

910 int spec_len,

911 int after_scheme,

912 Parsed* parsed) {

913 DoParseAfterScheme(spec, spec_len, after_scheme, parsed);

914 }

915

916 void ParseAfterScheme(const base::char16* spec,

917 int spec_len,

918 int after_scheme,

919 Parsed* parsed) {

920 DoParseAfterScheme(spec, spec_len, after_scheme, parsed);

921 }

922

923 } // namespace url_parse

OLD	NEW

« no previous file with comments | « url/url_parse.h ('k') | no next file » | no next file with comments »