Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(158)

Side by Side Diff: net/base/mac/url_conversions.mm

Issue 747773002: Upstream "nsurl_util.{h,mm}" from Chrome on iOS repository. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Updated files to reflect upstreaming. Diff against patch set 1. Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #import "net/base/mac/url_conversions.h"
6
7 #import <Foundation/Foundation.h>
8
9 #include "base/logging.h"
10 #include "base/mac/scoped_nsobject.h"
11 #include "base/strings/sys_string_conversions.h"
12 #include "url/gurl.h"
13
14 #if defined(OS_MACOSX) && !defined(OS_IOS)
15 #include "base/mac/mac_util.h"
16 #include "base/mac/sdk_forward_declarations.h"
17 #endif // defined(OS_MACOSX) && !defined(OS_IOS)
18
19 #if defined(OS_IOS)
20 #include "base/ios/ios_util.h"
21 #endif // defined(OS_IOS)
22
23 namespace {
24
25 // This function must only be called on OSX 10.7+ or iOS 4+.
26 // Returns a valid NSURL (compliant with RFC 1738/1808/2396) from an NSString
27 // that can contain any UTF-8 characters, representing a URI.
28 // If the input string represents a fully RFC 3986-compliant URI the resulting
29 // NSURL will be have the minimum conversion applied to be a valid NSURL with
30 // minimum damage to the intention of the RFC 3986-compliant URI.
31 // If the string represents a non-standard URI the method will make best-effort
32 // to normalize it. Note that making a URI standards-compliant while preserving
33 // its intention is *not* an exact science. This is why W3C *require* sites
34 // encode their own links as only the site builder knows the true link
35 // intention.
36 NSURL* URLWithUTF8String(const std::string& string) {
37 #if defined(OS_MACOSX) && !defined(OS_IOS)
38 DCHECK(base::mac::IsOSLionOrLater());
39 #endif // defined(OS_MACOSX) && !defined(OS_IOS)
40
41 #if defined(OS_IOS)
droger 2014/11/21 10:49:39 You can remove this ifdef, and the include on line
erikchen 2014/12/02 21:19:57 Done.
42 DCHECK(base::ios::IsRunningOnIOS7OrLater());
43 #endif // defined(OS_IOS)
44
45 // These are all the characters that NSURL *requires* to be encoded
46 // throughout.
47 // NSURL strictly enforces RFC 1738 which says:
48 // [certain] characters are unsafe because
49 // gateways and other transport agents are known to sometimes modify
50 // such characters. These characters are "{", "}", "|", "\", "^", "~",
51 // "[", "]", and "`".
52 // All unsafe characters must always be encoded within a URL.
53 // However RFC 3986 relaxes the policy and states:
54 // characters in the reserved set [which includes these characters]
55 // are protected from normalization and are therefore safe to be
56 // used by scheme-specific and producer-specific algorithms for
57 // delimiting data subcomponents within a URI.
58 // Therefore we *can* expect these characters to appear in URIs but *should*
59 // pct-encode them for RFC 1738.
mmenke 2014/11/21 15:48:55 We do not need a new method to do this. Set net::
erikchen 2014/11/22 00:11:39 mmenke: What are your expectations? (I ask only so
mmenke 2014/11/22 01:04:13 I'd like to duplicate as little code as possible.
60 static NSString* replacements[] = {
61 @" ", @"%20",
62 @"\"", @"%22",
63 @"<", @"%3C",
64 @">", @"%3E",
65 @"[", @"%5B", // NSURL will encode [ itself if we don't
66 @"\\", @"%5C",
67 @"]", @"%5D", // NSURL will encode ] itself if we don't
68 @"^", @"%5E",
69 @"{", @"%7B",
70 @"|", @"%7C",
71 @"}", @"%7D",
72 @"`", @"%60"};
73 int size = arraysize(replacements);
74 base::scoped_nsobject<NSMutableString> newString([[NSMutableString alloc]
75 initWithString:(base::SysUTF8ToNSString(string))]);
76 for (int i = 0; i < size / 2; i++) {
77 [newString replaceOccurrencesOfString:replacements[i * 2]
78 withString:replacements[i * 2 + 1]
79 options:NSLiteralSearch
80 range:NSMakeRange(0, [newString length])];
81 }
82 // RFC 1738:
83 // The character "%" is unsafe because it is used for
84 // encodings of other characters.
85 // All unsafe characters must always be encoded within a URL.
86 // Conclusion; % symbols that do not begin valid pct-encoding sequences should
87 // be pct-encoded (converted to %25).
88 // However, % symbols that begin pct-encoded sequences should be left
89 // untouched as these can appear in RFC 3986-compliant URIs.
90 [newString replaceOccurrencesOfString:@"%(?![a-fA-F0-9]{2})"
91 withString:@"%25"
92 options:NSRegularExpressionSearch
93 range:NSMakeRange(0, [newString length])];
94
95 // RFC 1738:
96 // URLs are written only with the graphic printable characters of the
97 // US-ASCII coded character set. The octets 80-FF hexadecimal are not used
98 // in US-ASCII. [...] the octets 00-1F and 7F hexadecimal represent control
99 // characters; these must be encoded.
100 NSUInteger length = [newString length];
101 // String is iterated in reverse to allow easy replacement of fragments with
102 // longer strings.
103 for (NSUInteger i = [newString length] - 1; i < length; i--) {
104 unichar uc = [newString characterAtIndex:i];
105 if (uc >= 0x7F || uc < 0x20) {
106 NSString* asString;
107 if (uc == 0x7F || uc < 0x20) {
108 asString = [NSString stringWithFormat:@"%%%02X", uc];
109 } else if (uc <= 0x7ff) {
110 // See row '11 Bits' from http://en.wikipedia.org/wiki/UTF-8#Design
111 asString = [NSString stringWithFormat:@"%%%X%%%X",
112 (6 << 5) | ((uc >> 6) & ((1 << 5) - 1)),
113 (2 << 6) | (uc & ((1 << 6) - 1))];
114 } else {
115 // See row '16 bits' from http://en.wikipedia.org/wiki/UTF-8#Design
116 asString = [NSString stringWithFormat:@"%%%X%%%X%%%X",
117 (14 << 4) | ((uc >> 12) & ((1 << 4) - 1)),
118 (2 << 6) | ((uc >> 6) & ((1 << 6) - 1)),
119 (2 << 6) | (uc & ((1 << 6) - 1))];
120 }
121 [newString replaceCharactersInRange:NSMakeRange(i, 1)
122 withString:asString];
123 }
124 }
125
126 // RFC 1738:
127 // The character "#" is unsafe and should
128 // always be encoded because it is used in World Wide Web and in other
129 // systems to delimit a URL from a fragment/anchor identifier that might
130 // follow it.
131 // Conclusion; valid NSURLs can contain one # symbol only. The first # symbol
132 // is likely to be a fragment divider so should be left untouched.
133 NSUInteger firstHash = [newString rangeOfString:@"#"].location;
134 if (firstHash != NSNotFound) {
135 NSUInteger searchLength = [newString length] - (firstHash + 1);
136 if (searchLength > 0) {
137 [newString replaceOccurrencesOfString:@"#"
138 withString:@"%23"
139 options:NSLiteralSearch
140 range:NSMakeRange(firstHash + 1,
141 searchLength)];
142 }
143 }
144
145 NSURL* nsUrl = [NSURL URLWithString:newString];
146 // Failure to create a valid NSURL is a program error.
147 DCHECK(nsUrl);
148 return nsUrl;
149 }
150
151 } // namespace
152
153 namespace net {
154
155 NSURL* NSURLWithGURL(const GURL& url) {
156 if (url.is_valid())
157 return URLWithUTF8String(url.spec());
158 return nil;
159 }
160
161 GURL GURLWithNSURL(NSURL* url) {
162 if (url)
163 return GURL([[url absoluteString] UTF8String]);
164 return GURL();
165 }
166
167 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698