Index: net/base/mac/url_conversions.mm |
diff --git a/net/base/mac/url_conversions.mm b/net/base/mac/url_conversions.mm |
new file mode 100644 |
index 0000000000000000000000000000000000000000..34e157d8c6c943c538786beb54f4716fc179163b |
--- /dev/null |
+++ b/net/base/mac/url_conversions.mm |
@@ -0,0 +1,167 @@ |
+// Copyright 2012 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#import "net/base/mac/url_conversions.h" |
+ |
+#import <Foundation/Foundation.h> |
+ |
+#include "base/logging.h" |
+#include "base/mac/scoped_nsobject.h" |
+#include "base/strings/sys_string_conversions.h" |
+#include "url/gurl.h" |
+ |
+#if defined(OS_MACOSX) && !defined(OS_IOS) |
+#include "base/mac/mac_util.h" |
+#include "base/mac/sdk_forward_declarations.h" |
+#endif // defined(OS_MACOSX) && !defined(OS_IOS) |
+ |
+#if defined(OS_IOS) |
+#include "base/ios/ios_util.h" |
+#endif // defined(OS_IOS) |
+ |
+namespace { |
+ |
+// This function must only be called on OSX 10.7+ or iOS 4+. |
+// Returns a valid NSURL (compliant with RFC 1738/1808/2396) from an NSString |
+// that can contain any UTF-8 characters, representing a URI. |
+// If the input string represents a fully RFC 3986-compliant URI the resulting |
+// NSURL will be have the minimum conversion applied to be a valid NSURL with |
+// minimum damage to the intention of the RFC 3986-compliant URI. |
+// If the string represents a non-standard URI the method will make best-effort |
+// to normalize it. Note that making a URI standards-compliant while preserving |
+// its intention is *not* an exact science. This is why W3C *require* sites |
+// encode their own links as only the site builder knows the true link |
+// intention. |
+NSURL* URLWithUTF8String(const std::string& string) { |
+#if defined(OS_MACOSX) && !defined(OS_IOS) |
+ DCHECK(base::mac::IsOSLionOrLater()); |
+#endif // defined(OS_MACOSX) && !defined(OS_IOS) |
+ |
+#if defined(OS_IOS) |
droger
2014/11/21 10:49:39
You can remove this ifdef, and the include on line
erikchen
2014/12/02 21:19:57
Done.
|
+ DCHECK(base::ios::IsRunningOnIOS7OrLater()); |
+#endif // defined(OS_IOS) |
+ |
+ // These are all the characters that NSURL *requires* to be encoded |
+ // throughout. |
+ // NSURL strictly enforces RFC 1738 which says: |
+ // [certain] characters are unsafe because |
+ // gateways and other transport agents are known to sometimes modify |
+ // such characters. These characters are "{", "}", "|", "\", "^", "~", |
+ // "[", "]", and "`". |
+ // All unsafe characters must always be encoded within a URL. |
+ // However RFC 3986 relaxes the policy and states: |
+ // characters in the reserved set [which includes these characters] |
+ // are protected from normalization and are therefore safe to be |
+ // used by scheme-specific and producer-specific algorithms for |
+ // delimiting data subcomponents within a URI. |
+ // Therefore we *can* expect these characters to appear in URIs but *should* |
+ // pct-encode them for RFC 1738. |
mmenke
2014/11/21 15:48:55
We do not need a new method to do this. Set net::
erikchen
2014/11/22 00:11:39
mmenke: What are your expectations? (I ask only so
mmenke
2014/11/22 01:04:13
I'd like to duplicate as little code as possible.
|
+ static NSString* replacements[] = { |
+ @" ", @"%20", |
+ @"\"", @"%22", |
+ @"<", @"%3C", |
+ @">", @"%3E", |
+ @"[", @"%5B", // NSURL will encode [ itself if we don't |
+ @"\\", @"%5C", |
+ @"]", @"%5D", // NSURL will encode ] itself if we don't |
+ @"^", @"%5E", |
+ @"{", @"%7B", |
+ @"|", @"%7C", |
+ @"}", @"%7D", |
+ @"`", @"%60"}; |
+ int size = arraysize(replacements); |
+ base::scoped_nsobject<NSMutableString> newString([[NSMutableString alloc] |
+ initWithString:(base::SysUTF8ToNSString(string))]); |
+ for (int i = 0; i < size / 2; i++) { |
+ [newString replaceOccurrencesOfString:replacements[i * 2] |
+ withString:replacements[i * 2 + 1] |
+ options:NSLiteralSearch |
+ range:NSMakeRange(0, [newString length])]; |
+ } |
+ // RFC 1738: |
+ // The character "%" is unsafe because it is used for |
+ // encodings of other characters. |
+ // All unsafe characters must always be encoded within a URL. |
+ // Conclusion; % symbols that do not begin valid pct-encoding sequences should |
+ // be pct-encoded (converted to %25). |
+ // However, % symbols that begin pct-encoded sequences should be left |
+ // untouched as these can appear in RFC 3986-compliant URIs. |
+ [newString replaceOccurrencesOfString:@"%(?![a-fA-F0-9]{2})" |
+ withString:@"%25" |
+ options:NSRegularExpressionSearch |
+ range:NSMakeRange(0, [newString length])]; |
+ |
+ // RFC 1738: |
+ // URLs are written only with the graphic printable characters of the |
+ // US-ASCII coded character set. The octets 80-FF hexadecimal are not used |
+ // in US-ASCII. [...] the octets 00-1F and 7F hexadecimal represent control |
+ // characters; these must be encoded. |
+ NSUInteger length = [newString length]; |
+ // String is iterated in reverse to allow easy replacement of fragments with |
+ // longer strings. |
+ for (NSUInteger i = [newString length] - 1; i < length; i--) { |
+ unichar uc = [newString characterAtIndex:i]; |
+ if (uc >= 0x7F || uc < 0x20) { |
+ NSString* asString; |
+ if (uc == 0x7F || uc < 0x20) { |
+ asString = [NSString stringWithFormat:@"%%%02X", uc]; |
+ } else if (uc <= 0x7ff) { |
+ // See row '11 Bits' from http://en.wikipedia.org/wiki/UTF-8#Design |
+ asString = [NSString stringWithFormat:@"%%%X%%%X", |
+ (6 << 5) | ((uc >> 6) & ((1 << 5) - 1)), |
+ (2 << 6) | (uc & ((1 << 6) - 1))]; |
+ } else { |
+ // See row '16 bits' from http://en.wikipedia.org/wiki/UTF-8#Design |
+ asString = [NSString stringWithFormat:@"%%%X%%%X%%%X", |
+ (14 << 4) | ((uc >> 12) & ((1 << 4) - 1)), |
+ (2 << 6) | ((uc >> 6) & ((1 << 6) - 1)), |
+ (2 << 6) | (uc & ((1 << 6) - 1))]; |
+ } |
+ [newString replaceCharactersInRange:NSMakeRange(i, 1) |
+ withString:asString]; |
+ } |
+ } |
+ |
+ // RFC 1738: |
+ // The character "#" is unsafe and should |
+ // always be encoded because it is used in World Wide Web and in other |
+ // systems to delimit a URL from a fragment/anchor identifier that might |
+ // follow it. |
+ // Conclusion; valid NSURLs can contain one # symbol only. The first # symbol |
+ // is likely to be a fragment divider so should be left untouched. |
+ NSUInteger firstHash = [newString rangeOfString:@"#"].location; |
+ if (firstHash != NSNotFound) { |
+ NSUInteger searchLength = [newString length] - (firstHash + 1); |
+ if (searchLength > 0) { |
+ [newString replaceOccurrencesOfString:@"#" |
+ withString:@"%23" |
+ options:NSLiteralSearch |
+ range:NSMakeRange(firstHash + 1, |
+ searchLength)]; |
+ } |
+ } |
+ |
+ NSURL* nsUrl = [NSURL URLWithString:newString]; |
+ // Failure to create a valid NSURL is a program error. |
+ DCHECK(nsUrl); |
+ return nsUrl; |
+} |
+ |
+} // namespace |
+ |
+namespace net { |
+ |
+NSURL* NSURLWithGURL(const GURL& url) { |
+ if (url.is_valid()) |
+ return URLWithUTF8String(url.spec()); |
+ return nil; |
+} |
+ |
+GURL GURLWithNSURL(NSURL* url) { |
+ if (url) |
+ return GURL([[url absoluteString] UTF8String]); |
+ return GURL(); |
+} |
+ |
+} // namespace net |