Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(274)

Side by Side Diff: url/mac/url_conversions.mm

Issue 719783005: Upstream "nsurl_util.{h,mm}" from Chrome on iOS repository. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Updated files to reflect upstreaming. Diff against patch set 3. Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #import "url/mac/url_conversions.h"
6
7 #import <Foundation/Foundation.h>
8
9 #include "base/logging.h"
10 #include "base/mac/scoped_nsobject.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "url/gurl.h"
13
14 #if defined(OS_MACOSX) && !defined(OS_IOS)
15 #include "base/mac/mac_util.h"
16 #include "base/mac/sdk_forward_declarations.h"
17 #endif // defined(OS_MACOSX) && !defined(OS_IOS)
18
19 #if defined(OS_IOS)
20 #include "base/ios/ios_util.h"
21 #endif // defined(OS_IOS)
22
23 namespace {
24
25 // This function must only be called on OSX 10.7+ or iOS 4+.
26 // Returns a valid NSURL (compliant with RFC 1738/1808/2396) from an NSString
27 // that can contain any UTF-8 characters, representing a URI.
28 // If the input string represents a fully RFC 3986-compliant URI the resulting
29 // NSURL will be have the minimum conversion applied to be a valid NSURL with
30 // minimum damage to the intention of the RFC 3986-compliant URI.
31 // If the string represents a non-standard URI the method will make best-effort
32 // to normalize it. Note that making a URI standards-compliant while preserving
33 // its intention is *not* an exact science. This is why W3C *require* sites
34 // encode their own links as only the site builder knows the true link
35 // intention.
36 NSURL* URLWithUTF8String(const std::string& string) {
37 #if defined(OS_MACOSX) && !defined(OS_IOS)
38 DCHECK(base::mac::IsOSLionOrLater());
39 #endif // defined(OS_MACOSX) && !defined(OS_IOS)
40
41 #if defined(OS_IOS)
42 DCHECK(base::ios::IsRunningOnIOS7OrLater());
43 #endif // defined(OS_IOS)
44
45 // These are all the characters that NSURL *requies* to be encoded throughout.
droger 2014/11/13 23:32:36 requires
erikchen 2014/11/13 23:41:25 Done.
46 // NSURL strictly enforces RFC 1738 which says:
47 // [certain] characters are unsafe because
48 // gateways and other transport agents are known to sometimes modify
49 // such characters. These characters are "{", "}", "|", "\", "^", "~",
50 // "[", "]", and "`".
51 // All unsafe characters must always be encoded within a URL.
52 // However RFC 3986 relaxes the policy and states:
53 // characters in the reserved set [which includes these characters]
54 // are protected from normalization and are therefore safe to be
55 // used by scheme-specific and producer-specific algorithms for
56 // delimiting data subcomponents within a URI.
57 // Therefore we *can* expect these characters to appear in URIs but *should*
58 // pct-encode them for RFC 1738.
59 static NSString* replacements[] = {
60 @" ", @"%20",
61 @"\"", @"%22",
62 @"<", @"%3C",
63 @">", @"%3E",
64 @"[", @"%5B", // NSURL will encode [ itself if we don't
65 @"\\", @"%5C",
66 @"]", @"%5D", // NSURL will encode ] itself if we don't
67 @"^", @"%5E",
68 @"{", @"%7B",
69 @"|", @"%7C",
70 @"}", @"%7D",
71 @"`", @"%60"};
72 int size = arraysize(replacements);
73 base::scoped_nsobject<NSMutableString> newString([[NSMutableString alloc]
74 initWithString:(base::SysUTF8ToNSString(string))]);
75 for (int i = 0; i < size / 2; i++) {
76 [newString replaceOccurrencesOfString:replacements[i * 2]
77 withString:replacements[i * 2 + 1]
78 options:NSLiteralSearch
79 range:NSMakeRange(0, [newString length])];
80 }
81 // RFC 1738:
82 // The character "%" is unsafe because it is used for
83 // encodings of other characters.
84 // All unsafe characters must always be encoded within a URL.
85 // Conclusion; % symbols that do not begin valid pct-encoding sequences should
86 // be pct-encoded (converted to %25).
87 // However, % symbols that begin pct-encoded sequences should be left
88 // untouched as these can appear in RFC 3986-compliant URIs.
89 [newString replaceOccurrencesOfString:@"%(?![a-fA-F0-9]{2})"
90 withString:@"%25"
91 options:NSRegularExpressionSearch
92 range:NSMakeRange(0, [newString length])];
93
94 // RFC 1738:
95 // URLs are written only with the graphic printable characters of the
96 // US-ASCII coded character set. The octets 80-FF hexadecimal are not used
97 // in US-ASCII. [...] the octets 00-1F and 7F hexadecimal represent control
98 // characters; these must be encoded.
99 NSUInteger length = [newString length];
100 // String is iterated in reverse to allow easy replacement of fragments with
101 // longer strings.
102 for (NSUInteger i = [newString length] - 1; i < length; i--) {
103 unichar uc = [newString characterAtIndex:i];
104 if (uc >= 0x7F || uc < 0x20) {
105 NSString* asString;
106 if (uc == 0x7F || uc < 0x20) {
107 asString = [NSString stringWithFormat:@"%%%02X", uc];
108 } else if (uc <= 0x7ff) {
109 // See row '11 Bits' from http://en.wikipedia.org/wiki/UTF-8#Design
110 asString = [NSString stringWithFormat:@"%%%X%%%X",
111 (6 << 5) | ((uc >> 6) & ((1 << 5) - 1)),
112 (2 << 6) | (uc & ((1 << 6) - 1))];
113 } else {
114 // See row '16 bits' from http://en.wikipedia.org/wiki/UTF-8#Design
115 asString = [NSString stringWithFormat:@"%%%X%%%X%%%X",
116 (14 << 4) | ((uc >> 12) & ((1 << 4) - 1)),
117 (2 << 6) | ((uc >> 6) & ((1 << 6) - 1)),
118 (2 << 6) | (uc & ((1 << 6) - 1))];
119 }
120 [newString replaceCharactersInRange:NSMakeRange(i, 1)
121 withString:asString];
122 }
123 }
124
125 // RFC 1738:
126 // The character "#" is unsafe and should
127 // always be encoded because it is used in World Wide Web and in other
128 // systems to delimit a URL from a fragment/anchor identifier that might
129 // follow it.
130 // Conclusion; valid NSURLs can contain one # symbol only. The first # symbol
131 // is likely to be a fragment divider so should be left untouched.
132 NSUInteger firstHash = [newString rangeOfString:@"#"].location;
133 if (firstHash != NSNotFound) {
134 NSUInteger searchLength = [newString length] - (firstHash + 1);
135 if (searchLength > 0) {
136 [newString replaceOccurrencesOfString:@"#"
137 withString:@"%23"
138 options:NSLiteralSearch
139 range:NSMakeRange(firstHash + 1,
140 searchLength)];
141 }
142 }
143
144 NSURL* nsUrl = [NSURL URLWithString:newString];
145 // Failure to create a valid NSURL is a program error.
146 DCHECK(nsUrl);
147 return nsUrl;
148 }
149
150 } // namespace
151
152 NSURL* NSURLWithGURL(const GURL& url) {
153 if (url.is_valid())
154 return URLWithUTF8String(url.spec());
155 return nil;
156 }
157
158 GURL GURLWithNSURL(NSURL* url) {
159 if (url)
160 return GURL([[url absoluteString] UTF8String]);
161 return GURL();
162 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698