OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #import "net/base/mac/url_conversions.h" | |
6 | |
7 #import <Foundation/Foundation.h> | |
8 | |
9 #include "base/logging.h" | |
10 #include "base/mac/scoped_nsobject.h" | |
11 #include "base/strings/sys_string_conversions.h" | |
12 #include "url/gurl.h" | |
13 | |
14 #if defined(OS_MACOSX) && !defined(OS_IOS) | |
15 #include "base/mac/mac_util.h" | |
16 #include "base/mac/sdk_forward_declarations.h" | |
17 #endif // defined(OS_MACOSX) && !defined(OS_IOS) | |
18 | |
19 #if defined(OS_IOS) | |
20 #include "base/ios/ios_util.h" | |
21 #endif // defined(OS_IOS) | |
22 | |
23 namespace { | |
24 | |
25 // This function must only be called on OSX 10.7+ or iOS 4+. | |
26 // Returns a valid NSURL (compliant with RFC 1738/1808/2396) from an NSString | |
27 // that can contain any UTF-8 characters, representing a URI. | |
28 // If the input string represents a fully RFC 3986-compliant URI the resulting | |
29 // NSURL will be have the minimum conversion applied to be a valid NSURL with | |
30 // minimum damage to the intention of the RFC 3986-compliant URI. | |
31 // If the string represents a non-standard URI the method will make best-effort | |
32 // to normalize it. Note that making a URI standards-compliant while preserving | |
33 // its intention is *not* an exact science. This is why W3C *require* sites | |
34 // encode their own links as only the site builder knows the true link | |
35 // intention. | |
36 NSURL* URLWithUTF8String(const std::string& string) { | |
37 #if defined(OS_MACOSX) && !defined(OS_IOS) | |
38 DCHECK(base::mac::IsOSLionOrLater()); | |
39 #endif // defined(OS_MACOSX) && !defined(OS_IOS) | |
40 | |
41 #if defined(OS_IOS) | |
droger
2014/11/21 10:49:39
You can remove this ifdef, and the include on line
erikchen
2014/12/02 21:19:57
Done.
| |
42 DCHECK(base::ios::IsRunningOnIOS7OrLater()); | |
43 #endif // defined(OS_IOS) | |
44 | |
45 // These are all the characters that NSURL *requires* to be encoded | |
46 // throughout. | |
47 // NSURL strictly enforces RFC 1738 which says: | |
48 // [certain] characters are unsafe because | |
49 // gateways and other transport agents are known to sometimes modify | |
50 // such characters. These characters are "{", "}", "|", "\", "^", "~", | |
51 // "[", "]", and "`". | |
52 // All unsafe characters must always be encoded within a URL. | |
53 // However RFC 3986 relaxes the policy and states: | |
54 // characters in the reserved set [which includes these characters] | |
55 // are protected from normalization and are therefore safe to be | |
56 // used by scheme-specific and producer-specific algorithms for | |
57 // delimiting data subcomponents within a URI. | |
58 // Therefore we *can* expect these characters to appear in URIs but *should* | |
59 // pct-encode them for RFC 1738. | |
mmenke
2014/11/21 15:48:55
We do not need a new method to do this. Set net::
erikchen
2014/11/22 00:11:39
mmenke: What are your expectations? (I ask only so
mmenke
2014/11/22 01:04:13
I'd like to duplicate as little code as possible.
| |
60 static NSString* replacements[] = { | |
61 @" ", @"%20", | |
62 @"\"", @"%22", | |
63 @"<", @"%3C", | |
64 @">", @"%3E", | |
65 @"[", @"%5B", // NSURL will encode [ itself if we don't | |
66 @"\\", @"%5C", | |
67 @"]", @"%5D", // NSURL will encode ] itself if we don't | |
68 @"^", @"%5E", | |
69 @"{", @"%7B", | |
70 @"|", @"%7C", | |
71 @"}", @"%7D", | |
72 @"`", @"%60"}; | |
73 int size = arraysize(replacements); | |
74 base::scoped_nsobject<NSMutableString> newString([[NSMutableString alloc] | |
75 initWithString:(base::SysUTF8ToNSString(string))]); | |
76 for (int i = 0; i < size / 2; i++) { | |
77 [newString replaceOccurrencesOfString:replacements[i * 2] | |
78 withString:replacements[i * 2 + 1] | |
79 options:NSLiteralSearch | |
80 range:NSMakeRange(0, [newString length])]; | |
81 } | |
82 // RFC 1738: | |
83 // The character "%" is unsafe because it is used for | |
84 // encodings of other characters. | |
85 // All unsafe characters must always be encoded within a URL. | |
86 // Conclusion; % symbols that do not begin valid pct-encoding sequences should | |
87 // be pct-encoded (converted to %25). | |
88 // However, % symbols that begin pct-encoded sequences should be left | |
89 // untouched as these can appear in RFC 3986-compliant URIs. | |
90 [newString replaceOccurrencesOfString:@"%(?![a-fA-F0-9]{2})" | |
91 withString:@"%25" | |
92 options:NSRegularExpressionSearch | |
93 range:NSMakeRange(0, [newString length])]; | |
94 | |
95 // RFC 1738: | |
96 // URLs are written only with the graphic printable characters of the | |
97 // US-ASCII coded character set. The octets 80-FF hexadecimal are not used | |
98 // in US-ASCII. [...] the octets 00-1F and 7F hexadecimal represent control | |
99 // characters; these must be encoded. | |
100 NSUInteger length = [newString length]; | |
101 // String is iterated in reverse to allow easy replacement of fragments with | |
102 // longer strings. | |
103 for (NSUInteger i = [newString length] - 1; i < length; i--) { | |
104 unichar uc = [newString characterAtIndex:i]; | |
105 if (uc >= 0x7F || uc < 0x20) { | |
106 NSString* asString; | |
107 if (uc == 0x7F || uc < 0x20) { | |
108 asString = [NSString stringWithFormat:@"%%%02X", uc]; | |
109 } else if (uc <= 0x7ff) { | |
110 // See row '11 Bits' from http://en.wikipedia.org/wiki/UTF-8#Design | |
111 asString = [NSString stringWithFormat:@"%%%X%%%X", | |
112 (6 << 5) | ((uc >> 6) & ((1 << 5) - 1)), | |
113 (2 << 6) | (uc & ((1 << 6) - 1))]; | |
114 } else { | |
115 // See row '16 bits' from http://en.wikipedia.org/wiki/UTF-8#Design | |
116 asString = [NSString stringWithFormat:@"%%%X%%%X%%%X", | |
117 (14 << 4) | ((uc >> 12) & ((1 << 4) - 1)), | |
118 (2 << 6) | ((uc >> 6) & ((1 << 6) - 1)), | |
119 (2 << 6) | (uc & ((1 << 6) - 1))]; | |
120 } | |
121 [newString replaceCharactersInRange:NSMakeRange(i, 1) | |
122 withString:asString]; | |
123 } | |
124 } | |
125 | |
126 // RFC 1738: | |
127 // The character "#" is unsafe and should | |
128 // always be encoded because it is used in World Wide Web and in other | |
129 // systems to delimit a URL from a fragment/anchor identifier that might | |
130 // follow it. | |
131 // Conclusion; valid NSURLs can contain one # symbol only. The first # symbol | |
132 // is likely to be a fragment divider so should be left untouched. | |
133 NSUInteger firstHash = [newString rangeOfString:@"#"].location; | |
134 if (firstHash != NSNotFound) { | |
135 NSUInteger searchLength = [newString length] - (firstHash + 1); | |
136 if (searchLength > 0) { | |
137 [newString replaceOccurrencesOfString:@"#" | |
138 withString:@"%23" | |
139 options:NSLiteralSearch | |
140 range:NSMakeRange(firstHash + 1, | |
141 searchLength)]; | |
142 } | |
143 } | |
144 | |
145 NSURL* nsUrl = [NSURL URLWithString:newString]; | |
146 // Failure to create a valid NSURL is a program error. | |
147 DCHECK(nsUrl); | |
148 return nsUrl; | |
149 } | |
150 | |
151 } // namespace | |
152 | |
153 namespace net { | |
154 | |
155 NSURL* NSURLWithGURL(const GURL& url) { | |
156 if (url.is_valid()) | |
157 return URLWithUTF8String(url.spec()); | |
158 return nil; | |
159 } | |
160 | |
161 GURL GURLWithNSURL(NSURL* url) { | |
162 if (url) | |
163 return GURL([[url absoluteString] UTF8String]); | |
164 return GURL(); | |
165 } | |
166 | |
167 } // namespace net | |
OLD | NEW |