OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 #include "vm/uri.h" | |
6 | |
7 #include "vm/zone.h" | |
8 | |
9 namespace dart { | |
10 | |
11 // Lower-case a string in place. | |
12 static void StringLower(char* str) { | |
13 for (int i = 0; str[i] != '\0'; i++) { | |
14 char c = str[i]; | |
Florian Schneider
2016/05/26 14:09:45
Why not use libc tolower?
turnidge
2016/05/31 18:25:27
I only wanted ASCII characters to be modified -- I
Florian Schneider
2016/06/01 07:13:33
Ok, it may be worth adding a short comment.
turnidge
2016/06/01 20:00:40
Done.
| |
15 if (c >= 'A' && c <= 'Z') { | |
16 str[i] = c + ('a' - 'A'); | |
17 } | |
18 } | |
19 } | |
20 | |
21 | |
22 static bool IsUnreservedChar(intptr_t value) { | |
23 return ((value >= 'a' && value <= 'z') || | |
24 (value >= 'A' && value <= 'Z') || | |
25 (value >= '0' && value <= '9') || | |
26 value == '-' || | |
27 value == '.' || | |
28 value == '_' || | |
29 value == '~'); | |
30 } | |
31 | |
32 | |
33 static bool IsDelimiter(intptr_t value) { | |
34 switch (value) { | |
35 case ':': case '/': case '?': case '#': | |
36 case '[': case ']': case '@': case '!': | |
37 case '$': case '&': case '\'': case '(': | |
38 case ')': case '*': case '+': case ',': | |
39 case ';': case '=': | |
40 return true; | |
41 default: | |
42 return false; | |
43 } | |
44 } | |
45 | |
46 | |
47 static bool IsHexDigit(char value) { | |
48 return ((value >- '0' && value <= '9') || | |
Florian Schneider
2016/06/01 07:13:33
-'0' does not seem right.
s/>-/>=/
turnidge
2016/06/01 20:00:40
Nice catch. Thanks. Fixed.
| |
49 (value >= 'A' && value <= 'F') || | |
50 (value >= 'a' && value <= 'f')); | |
51 } | |
52 | |
53 | |
54 static int HexValue(char digit) { | |
55 if ((digit >= '0' && digit <= '9')) { | |
56 return digit - '0'; | |
57 } | |
58 if ((digit >= 'A' && digit <= 'F')) { | |
59 return digit - 'A' + 10; | |
60 } | |
61 if ((digit >= 'a' && digit <= 'f')) { | |
62 return digit - 'a' + 10; | |
63 } | |
64 UNREACHABLE(); | |
65 return 0; | |
66 } | |
67 | |
68 | |
69 static int GetEscapedValue(const char* str, intptr_t pos, intptr_t len) { | |
70 if (pos + 2 >= len) { | |
71 // Not enough room for a valid escape sequence. | |
72 return -1; | |
73 } | |
74 if (str[pos] != '%') { | |
75 // Escape sequences start with '%'. | |
76 return -1; | |
77 } | |
78 | |
79 char digit1 = str[pos + 1]; | |
80 char digit2 = str[pos + 2]; | |
81 if (!IsHexDigit(digit1) || !IsHexDigit(digit2)) { | |
82 // Invalid escape sequence. Ignore it. | |
ahe
2016/05/31 21:44:42
What does the RFC say about this situation?
turnidge
2016/06/01 20:00:40
I couldn't exactly figure it out, but my best gues
| |
83 return -1; | |
84 } | |
85 return HexValue(digit1) * 16 + HexValue(digit2); | |
86 } | |
87 | |
88 | |
89 static char* NormalizeEscapes(const char* str, intptr_t len) { | |
90 // Allocate the buffer. | |
91 Zone* zone = Thread::Current()->zone(); | |
92 char* buffer = zone->Alloc<char>(len * 3 + 1); // +1 for '\0' | |
Cutch
2016/05/24 23:53:46
nit: (len * 3) + 1
maybe a comment about the * 3
turnidge
2016/05/31 18:25:27
Done.
| |
93 | |
94 // Copy the string, normalizing as we go. | |
95 intptr_t buffer_pos = 0; | |
96 intptr_t pos = 0; | |
97 while (pos < len) { | |
98 int escaped_value = GetEscapedValue(str, pos, len); | |
ahe
2016/05/25 12:44:52
I don't understand this code. As far as I can tell
turnidge
2016/05/27 21:40:05
I think you probably need to look at GetEscapedVal
ahe
2016/05/31 21:44:42
This is embarrassing, I spent a lot of time lookin
| |
99 if (escaped_value >= 0) { | |
100 // If one of the special "unreserved" characters has been | |
101 // escaped, revert the escaping. Otherwise preserve the | |
102 // escaping. | |
103 if (IsUnreservedChar(escaped_value)) { | |
104 buffer[buffer_pos] = escaped_value; | |
105 buffer_pos++; | |
106 } else { | |
107 OS::SNPrint(buffer + buffer_pos, 4, "%%%02X", escaped_value); | |
Cutch
2016/05/24 23:53:46
Would copying the the three characters from the or
turnidge
2016/05/27 21:40:06
By reprinting the escaped value we normalize it to
| |
108 buffer_pos += 3; | |
109 } | |
110 pos += 3; | |
111 } else { | |
112 char c = str[pos]; | |
113 // If a delimiter or unreserved character is currently not | |
114 // escaped, preserve that. If there is a busted %-sequence in | |
115 // the input, preserve that too. | |
116 if (c == '%' || IsDelimiter(c) || IsUnreservedChar(c)) { | |
117 buffer[buffer_pos] = c; | |
118 buffer_pos++; | |
119 } else { | |
120 // Escape funky characters. | |
121 OS::SNPrint(buffer + buffer_pos, 4, "%%%02X", c); | |
122 buffer_pos += 3; | |
123 } | |
124 pos++; | |
125 } | |
126 } | |
127 buffer[buffer_pos] = '\0'; | |
128 return buffer; | |
129 } | |
130 | |
131 | |
132 static void ClearParsedUri(ParsedUri* parsed_uri) { | |
133 parsed_uri->scheme = NULL; | |
134 parsed_uri->userinfo = NULL; | |
135 parsed_uri->host = NULL; | |
136 parsed_uri->port = NULL; | |
137 parsed_uri->path = NULL; | |
138 parsed_uri->query = NULL; | |
139 parsed_uri->fragment = NULL; | |
140 } | |
141 | |
142 | |
143 static intptr_t ParseAuthority(const char* authority, ParsedUri* parsed_uri) { | |
144 Zone* zone = Thread::Current()->zone(); | |
145 const char* current = authority; | |
146 intptr_t len = 0; | |
147 | |
148 size_t userinfo_len = strcspn(current, "@/"); | |
149 if (current[userinfo_len] == '@') { | |
150 // The '@' character follows the optional userinfo string. | |
151 parsed_uri->userinfo = NormalizeEscapes(current, userinfo_len); | |
152 current += userinfo_len + 1; | |
153 len += userinfo_len + 1; | |
154 } else { | |
155 parsed_uri->userinfo = NULL; | |
156 } | |
157 | |
158 size_t host_len = strcspn(current, ":/"); | |
159 char* host = NormalizeEscapes(current, host_len); | |
160 StringLower(host); | |
161 parsed_uri->host = host; | |
162 len += host_len; | |
163 | |
164 if (current[host_len] == ':') { | |
165 // The ':' character precedes the optional port string. | |
166 const char* port_start = current + host_len + 1; // +1 for ':' | |
167 size_t port_len = strcspn(port_start, "/"); | |
168 parsed_uri->port = zone->MakeCopyOfStringN(port_start, port_len); | |
169 len += 1 + port_len; // +1 for ':' | |
170 } else { | |
171 parsed_uri->port = NULL; | |
172 } | |
173 return len; | |
174 } | |
175 | |
176 | |
177 // Performs a simple parse of a uri into its components. | |
178 // See RFC 3986 Section 3: Syntax. | |
179 bool ParseUri(const char* uri, ParsedUri* parsed_uri) { | |
180 Zone* zone = Thread::Current()->zone(); | |
181 | |
182 // The first ':' separates the scheme from the rest of the uri. If | |
183 // a ':' occurs after the first '/' it doesn't count. | |
184 size_t scheme_len = strcspn(uri, ":/"); | |
185 const char* rest = uri; | |
186 if (uri[scheme_len] == ':') { | |
187 char* scheme = zone->MakeCopyOfStringN(uri, scheme_len); | |
188 StringLower(scheme); | |
189 parsed_uri->scheme = scheme; | |
190 rest = uri + scheme_len + 1; | |
191 } else { | |
192 parsed_uri->scheme = NULL; | |
193 } | |
194 | |
195 // The first '#' separates the optional fragment | |
196 const char* hash_pos = rest + strcspn(rest, "#"); | |
197 if (*hash_pos == '#') { | |
198 // There is a fragment part. | |
199 const char* fragment_start = hash_pos + 1; | |
200 parsed_uri->fragment = | |
201 NormalizeEscapes(fragment_start, strlen(fragment_start)); | |
202 } else { | |
203 parsed_uri->fragment = NULL; | |
204 } | |
205 | |
206 // The first '?' or '#' separates the hierarchical part from the | |
207 // optional query. | |
208 const char* question_pos = rest + strcspn(rest, "?#"); | |
209 if (*question_pos == '?') { | |
210 // There is a query part. | |
211 const char* query_start = question_pos + 1; | |
212 parsed_uri->query = | |
213 NormalizeEscapes(query_start, (hash_pos - query_start)); | |
214 } else { | |
215 parsed_uri->query = NULL; | |
216 } | |
217 | |
218 const char* path_start = rest; | |
219 if (rest[0] == '/' && rest[1] == '/') { | |
220 // There is an authority part. | |
221 const char* authority_start = rest + 2; // 2 for '//'. | |
222 | |
223 intptr_t authority_len = | |
224 ParseAuthority(authority_start, parsed_uri); | |
225 if (authority_len < 0) { | |
226 ClearParsedUri(parsed_uri); | |
227 return false; | |
228 } | |
229 path_start = authority_start + authority_len; | |
230 } else { | |
231 parsed_uri->userinfo = NULL; | |
232 parsed_uri->host = NULL; | |
233 parsed_uri->port = NULL; | |
234 } | |
235 | |
236 // Double slashes in the path do not parse. | |
237 bool saw_slash = false; | |
238 for (const char* pos = path_start; pos < question_pos; pos++) { | |
239 if (*pos == '/') { | |
240 if (saw_slash) { | |
241 ClearParsedUri(parsed_uri); | |
242 return false; | |
243 } | |
244 saw_slash = true; | |
245 } else { | |
246 saw_slash = false; | |
247 } | |
248 } | |
249 | |
250 // The path is the substring between the authority and the query. | |
251 parsed_uri->path = NormalizeEscapes(path_start, (question_pos - path_start)); | |
252 return true; | |
253 } | |
254 | |
255 | |
256 static char* RemoveLastSegment(char* current, | |
257 const char* base) { | |
258 if (current == base) { | |
259 return current; | |
260 } | |
261 ASSERT(current > base); | |
262 for (current--; current > base; current--) { | |
263 if (*current == '/') { | |
264 // We have found the beginning of the last segment. | |
265 return current; | |
266 } | |
267 } | |
268 ASSERT(current == base); | |
269 return current; | |
270 } | |
271 | |
272 | |
273 static intptr_t SegmentLength(const char* input) { | |
274 const char* cp = input; | |
275 | |
276 // Include initial slash in the segment, if any. | |
277 if (*cp == '/') { | |
278 cp++; | |
279 } | |
280 | |
281 // Don't include trailing slash in the segment. | |
282 cp += strcspn(cp, "/"); | |
283 return cp - input; | |
284 } | |
285 | |
286 | |
287 // See RFC 3986 Section 5.2.4: Remove Dot Segments. | |
288 static const char* RemoveDotSegments(const char* path) { | |
289 const char* input = path; | |
290 | |
291 // The output path will always be less than or equal to the size of | |
292 // the input path. | |
293 Zone* zone = Thread::Current()->zone(); | |
294 char* buffer = zone->Alloc<char>(strlen(path) + 1); // +1 for '\0' | |
295 char* output = buffer; | |
296 | |
297 while (*input != '\0') { | |
298 if (strncmp("../", input, 3) == 0) { | |
299 // Discard initial "../" from the input. It's junk. | |
300 input += 3; | |
301 | |
302 } else if (strncmp("./", input, 3) == 0) { | |
303 // Discard initial "./" from the input. It's junk. | |
304 input += 2; | |
305 | |
306 } else if (strncmp("/./", input, 3) == 0) { | |
307 // Advance past the "/." part of the input. | |
308 input += 2; | |
309 | |
310 } else if (strcmp("/.", input) == 0) { | |
311 // Pretend the input just contains a "/". | |
312 input = "/"; | |
313 | |
314 } else if (strncmp("/../", input, 4) == 0) { | |
315 // Advance past the "/.." part of the input and remove one | |
316 // segment from the output. | |
317 input += 3; | |
318 output = RemoveLastSegment(output, buffer); | |
319 | |
320 } else if (strcmp("/..", input) == 0) { | |
321 // Pretend the input contains a "/" and remove one segment from | |
322 // the output. | |
323 input = "/"; | |
324 output = RemoveLastSegment(output, buffer); | |
325 | |
326 } else if (strcmp("..", input) == 0) { | |
327 // The input has been reduced to nothing useful. | |
328 input += 2; | |
329 | |
330 } else if (strcmp(".", input) == 0) { | |
331 // The input has been reduced to nothing useful. | |
332 input += 1; | |
333 | |
334 } else { | |
335 intptr_t segment_len = SegmentLength(input); | |
336 strncpy(output, input, segment_len); | |
337 output += segment_len; | |
338 input += segment_len; | |
339 } | |
340 } | |
341 *output = '\0'; | |
342 return buffer; | |
343 } | |
344 | |
345 | |
346 // See RFC 3986 Section 5.2.3: Merge Paths. | |
347 static const char* MergePaths(const char* base_path, const char* ref_path) { | |
348 Zone* zone = Thread::Current()->zone(); | |
349 if (base_path[0] == '\0') { | |
350 // If the base_path is empty, we prepend '/'. | |
351 return zone->PrintToString("/%s", ref_path); | |
352 } | |
353 | |
354 // We need to find the last '/' in base_path. | |
355 char* last_slash = strrchr(base_path, '/'); | |
356 if (last_slash == NULL) { | |
357 // There is no slash in the base_path. Return the ref_path unchanged. | |
358 return ref_path; | |
359 } | |
360 | |
361 // We found a '/' in the base_path. Cut off everything after it and | |
362 // add the ref_path. | |
363 intptr_t truncated_base_len = last_slash - base_path; | |
364 intptr_t ref_path_len = strlen(ref_path); | |
365 intptr_t len = truncated_base_len + ref_path_len + 1; // +1 for '/' | |
366 char* buffer = zone->Alloc<char>(len + 1); // +1 for '\0' | |
367 | |
368 // Copy truncated base. | |
369 strncpy(buffer, base_path, truncated_base_len); | |
370 | |
371 // Add a slash. | |
372 buffer[truncated_base_len] = '/'; | |
373 | |
374 // Copy the ref_path. | |
375 strncpy((buffer + truncated_base_len + 1), ref_path, ref_path_len); | |
376 | |
377 // Add the trailing '\0'. | |
378 buffer[len] = '\0'; | |
379 | |
380 return buffer; | |
381 } | |
382 | |
383 | |
384 static char* BuildUri(const ParsedUri& uri) { | |
385 Zone* zone = Thread::Current()->zone(); | |
386 ASSERT(uri.path != NULL); | |
387 | |
388 const char* fragment = uri.fragment == NULL ? "" : uri.fragment; | |
389 const char* fragment_separator = uri.fragment == NULL ? "" : "#"; | |
390 const char* query = uri.query == NULL ? "" : uri.query; | |
391 const char* query_separator = uri.query == NULL ? "" : "?"; | |
392 | |
393 // If there is no scheme for this uri, just build a relative uri of | |
394 // the form: "path[?query][#fragment]". This is sort of a | |
395 // degenerate case, but it occurs when we resolve relative urls | |
ahe
2016/05/31 21:44:42
I wouldn't call this a degenerate case. It's quite
turnidge
2016/06/01 20:00:40
Corrected the comment.
| |
396 // inside a "dart:" library. | |
397 if (uri.scheme == NULL) { | |
398 ASSERT(uri.userinfo == NULL && uri.host == NULL && uri.port == NULL); | |
399 ASSERT(uri.query == NULL); | |
400 return zone->PrintToString("%s%s%s%s%s", | |
401 uri.path, query_separator, query, | |
402 fragment_separator, fragment); | |
403 } | |
404 | |
405 // Uri with no authority: "scheme:path[?query][#fragment]" | |
406 if (uri.host == NULL) { | |
407 ASSERT(uri.userinfo == NULL && uri.port == NULL); | |
408 return zone->PrintToString("%s:%s%s%s%s%s", | |
409 uri.scheme, uri.path, query_separator, query, | |
410 fragment_separator, fragment); | |
411 } | |
412 | |
413 const char* user = uri.userinfo == NULL ? "" : uri.userinfo; | |
414 const char* user_separator = uri.userinfo == NULL ? "" : "@"; | |
415 const char* port = uri.port == NULL ? "" : uri.port; | |
416 const char* port_separator = uri.port == NULL ? "" : ":"; | |
417 | |
418 // If the path doesn't start with a '/', add one. We need it to | |
419 // separate the path from the authority. | |
420 const char* path_separator = ((uri.path[0] == '\0' || uri.path[0] == '/') | |
421 ? "" : "/"); | |
422 | |
423 // Uri with authority: | |
424 // "scheme://[userinfo@]host[:port][/]path[?query][#fragment]" | |
425 return zone->PrintToString( | |
426 "%s://%s%s%s%s%s%s%s%s%s%s%s", // There is *nothing* wrong with this. | |
427 uri.scheme, user, user_separator, uri.host, port_separator, port, | |
428 path_separator, uri.path, query_separator, query, | |
429 fragment_separator, fragment); | |
430 } | |
431 | |
432 | |
433 // See RFC 3986 Section 5: Reference Resolution | |
434 bool ResolveUri(const char* ref_uri, | |
435 const char* base_uri, | |
436 const char** target_uri) { | |
437 // Parse the reference uri. | |
438 ParsedUri ref; | |
439 if (!ParseUri(ref_uri, &ref)) { | |
440 *target_uri = NULL; | |
441 return false; | |
442 } | |
443 | |
444 ParsedUri target; | |
445 if (ref.scheme != NULL) { | |
446 if (strcmp(ref.scheme, "dart") == 0) { | |
447 Zone* zone = Thread::Current()->zone(); | |
448 *target_uri = zone->MakeCopyOfString(ref_uri); | |
449 return true; | |
450 } | |
451 | |
452 // When the ref_uri specifies a scheme, the base_uri is ignored. | |
453 target.scheme = ref.scheme; | |
454 target.userinfo = ref.userinfo; | |
455 target.host = ref.host; | |
456 target.port = ref.port; | |
457 target.path = RemoveDotSegments(ref.path); | |
458 target.query = ref.query; | |
459 target.fragment = ref.fragment; | |
460 *target_uri = BuildUri(target); | |
461 return true; | |
462 } | |
463 | |
464 // Parse the base uri. | |
465 ParsedUri base; | |
466 if (!ParseUri(base_uri, &base)) { | |
467 *target_uri = NULL; | |
468 return false; | |
469 } | |
470 | |
471 if (base.scheme != NULL && strcmp(base.scheme, "dart") == 0) { | |
Cutch
2016/05/24 23:53:46
nits about parenthesis:
(base.scheme != NULL) &&
turnidge
2016/05/27 21:40:06
Done.
ahe
2016/05/31 21:44:42
This is odd. Why is there a special case for the d
turnidge
2016/06/01 20:00:40
Discussed offline. Sometimes we resolve a relativ
| |
472 Zone* zone = Thread::Current()->zone(); | |
473 *target_uri = zone->MakeCopyOfString(ref_uri); | |
474 return true; | |
475 } | |
476 | |
477 if (ref.host != NULL) { | |
478 // When the ref_uri specifies an authority, we only use the base scheme. | |
479 target.scheme = base.scheme; | |
480 target.userinfo = ref.userinfo; | |
481 target.host = ref.host; | |
482 target.port = ref.port; | |
483 target.path = RemoveDotSegments(ref.path); | |
484 target.query = ref.query; | |
485 target.fragment = ref.fragment; | |
486 *target_uri = BuildUri(target); | |
487 return true; | |
488 } | |
489 | |
490 if (ref.path[0] == '\0') { | |
491 // Empty path. Use most parts of base_uri. | |
492 target.scheme = base.scheme; | |
493 target.userinfo = base.userinfo; | |
494 target.host = base.host; | |
495 target.port = base.port; | |
496 target.path = base.path; | |
497 target.query = ((ref.query == NULL) ? base.query : ref.query); | |
498 target.fragment = ref.fragment; | |
499 *target_uri = BuildUri(target); | |
500 return true; | |
501 | |
502 } else if (ref.path[0] == '/') { | |
503 // Absolute path. ref_path wins. | |
504 target.scheme = base.scheme; | |
505 target.userinfo = base.userinfo; | |
506 target.host = base.host; | |
507 target.port = base.port; | |
508 target.path = RemoveDotSegments(ref.path); | |
509 target.query = ref.query; | |
510 target.fragment = ref.fragment; | |
511 *target_uri = BuildUri(target); | |
512 return true; | |
513 | |
514 } else { | |
515 // Relative path. We need to merge base_path and ref_path. | |
516 target.scheme = base.scheme; | |
517 target.userinfo = base.userinfo; | |
518 target.host = base.host; | |
519 target.port = base.port; | |
520 target.path = RemoveDotSegments(MergePaths(base.path, ref.path)); | |
521 target.query = ref.query; | |
522 target.fragment = ref.fragment; | |
523 *target_uri = BuildUri(target); | |
524 return true; | |
525 } | |
526 } | |
527 | |
528 } // namespace dart | |
OLD | NEW |