Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(634)

Unified Diff: url/mac/url_conversions.mm

Issue 719783005: Upstream "nsurl_util.{h,mm}" from Chrome on iOS repository. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Comments from droger. Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « url/mac/url_conversions.h ('k') | url/mac/url_conversions_unittest.mm » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: url/mac/url_conversions.mm
diff --git a/url/mac/url_conversions.mm b/url/mac/url_conversions.mm
new file mode 100644
index 0000000000000000000000000000000000000000..7cbabadad1600c52aa3fe912e35984a2a7b98159
--- /dev/null
+++ b/url/mac/url_conversions.mm
@@ -0,0 +1,163 @@
+// Copyright 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#import "url/mac/url_conversions.h"
+
+#import <Foundation/Foundation.h>
+
+#include "base/logging.h"
+#include "base/mac/scoped_nsobject.h"
+#include "base/strings/sys_string_conversions.h"
+#include "url/gurl.h"
+
+#if defined(OS_MACOSX) && !defined(OS_IOS)
+#include "base/mac/mac_util.h"
+#include "base/mac/sdk_forward_declarations.h"
+#endif // defined(OS_MACOSX) && !defined(OS_IOS)
+
+#if defined(OS_IOS)
+#include "base/ios/ios_util.h"
+#endif // defined(OS_IOS)
+
+namespace {
+
+// This function must only be called on OSX 10.7+ or iOS 4+.
+// Returns a valid NSURL (compliant with RFC 1738/1808/2396) from an NSString
+// that can contain any UTF-8 characters, representing a URI.
+// If the input string represents a fully RFC 3986-compliant URI the resulting
+// NSURL will be have the minimum conversion applied to be a valid NSURL with
+// minimum damage to the intention of the RFC 3986-compliant URI.
+// If the string represents a non-standard URI the method will make best-effort
+// to normalize it. Note that making a URI standards-compliant while preserving
+// its intention is *not* an exact science. This is why W3C *require* sites
+// encode their own links as only the site builder knows the true link
+// intention.
+NSURL* URLWithUTF8String(const std::string& string) {
+#if defined(OS_MACOSX) && !defined(OS_IOS)
+ DCHECK(base::mac::IsOSLionOrLater());
+#endif // defined(OS_MACOSX) && !defined(OS_IOS)
+
+#if defined(OS_IOS)
+ DCHECK(base::ios::IsRunningOnIOS7OrLater());
+#endif // defined(OS_IOS)
+
+ // These are all the characters that NSURL *requires* to be encoded
+ // throughout.
+ // NSURL strictly enforces RFC 1738 which says:
+ // [certain] characters are unsafe because
+ // gateways and other transport agents are known to sometimes modify
+ // such characters. These characters are "{", "}", "|", "\", "^", "~",
+ // "[", "]", and "`".
+ // All unsafe characters must always be encoded within a URL.
+ // However RFC 3986 relaxes the policy and states:
+ // characters in the reserved set [which includes these characters]
+ // are protected from normalization and are therefore safe to be
+ // used by scheme-specific and producer-specific algorithms for
+ // delimiting data subcomponents within a URI.
+ // Therefore we *can* expect these characters to appear in URIs but *should*
+ // pct-encode them for RFC 1738.
+ static NSString* replacements[] = {
+ @" ", @"%20",
+ @"\"", @"%22",
+ @"<", @"%3C",
+ @">", @"%3E",
+ @"[", @"%5B", // NSURL will encode [ itself if we don't
+ @"\\", @"%5C",
+ @"]", @"%5D", // NSURL will encode ] itself if we don't
+ @"^", @"%5E",
+ @"{", @"%7B",
+ @"|", @"%7C",
+ @"}", @"%7D",
+ @"`", @"%60"};
+ int size = arraysize(replacements);
+ base::scoped_nsobject<NSMutableString> newString([[NSMutableString alloc]
+ initWithString:(base::SysUTF8ToNSString(string))]);
+ for (int i = 0; i < size / 2; i++) {
+ [newString replaceOccurrencesOfString:replacements[i * 2]
+ withString:replacements[i * 2 + 1]
+ options:NSLiteralSearch
+ range:NSMakeRange(0, [newString length])];
+ }
+ // RFC 1738:
+ // The character "%" is unsafe because it is used for
+ // encodings of other characters.
+ // All unsafe characters must always be encoded within a URL.
+ // Conclusion; % symbols that do not begin valid pct-encoding sequences should
+ // be pct-encoded (converted to %25).
+ // However, % symbols that begin pct-encoded sequences should be left
+ // untouched as these can appear in RFC 3986-compliant URIs.
+ [newString replaceOccurrencesOfString:@"%(?![a-fA-F0-9]{2})"
+ withString:@"%25"
+ options:NSRegularExpressionSearch
+ range:NSMakeRange(0, [newString length])];
+
+ // RFC 1738:
+ // URLs are written only with the graphic printable characters of the
+ // US-ASCII coded character set. The octets 80-FF hexadecimal are not used
+ // in US-ASCII. [...] the octets 00-1F and 7F hexadecimal represent control
+ // characters; these must be encoded.
+ NSUInteger length = [newString length];
+ // String is iterated in reverse to allow easy replacement of fragments with
+ // longer strings.
+ for (NSUInteger i = [newString length] - 1; i < length; i--) {
+ unichar uc = [newString characterAtIndex:i];
+ if (uc >= 0x7F || uc < 0x20) {
+ NSString* asString;
+ if (uc == 0x7F || uc < 0x20) {
+ asString = [NSString stringWithFormat:@"%%%02X", uc];
+ } else if (uc <= 0x7ff) {
+ // See row '11 Bits' from http://en.wikipedia.org/wiki/UTF-8#Design
+ asString = [NSString stringWithFormat:@"%%%X%%%X",
+ (6 << 5) | ((uc >> 6) & ((1 << 5) - 1)),
+ (2 << 6) | (uc & ((1 << 6) - 1))];
+ } else {
+ // See row '16 bits' from http://en.wikipedia.org/wiki/UTF-8#Design
+ asString = [NSString stringWithFormat:@"%%%X%%%X%%%X",
+ (14 << 4) | ((uc >> 12) & ((1 << 4) - 1)),
+ (2 << 6) | ((uc >> 6) & ((1 << 6) - 1)),
+ (2 << 6) | (uc & ((1 << 6) - 1))];
+ }
+ [newString replaceCharactersInRange:NSMakeRange(i, 1)
+ withString:asString];
+ }
+ }
+
+ // RFC 1738:
+ // The character "#" is unsafe and should
+ // always be encoded because it is used in World Wide Web and in other
+ // systems to delimit a URL from a fragment/anchor identifier that might
+ // follow it.
+ // Conclusion; valid NSURLs can contain one # symbol only. The first # symbol
+ // is likely to be a fragment divider so should be left untouched.
+ NSUInteger firstHash = [newString rangeOfString:@"#"].location;
+ if (firstHash != NSNotFound) {
+ NSUInteger searchLength = [newString length] - (firstHash + 1);
+ if (searchLength > 0) {
+ [newString replaceOccurrencesOfString:@"#"
+ withString:@"%23"
+ options:NSLiteralSearch
+ range:NSMakeRange(firstHash + 1,
+ searchLength)];
+ }
+ }
+
+ NSURL* nsUrl = [NSURL URLWithString:newString];
+ // Failure to create a valid NSURL is a program error.
+ DCHECK(nsUrl);
+ return nsUrl;
+}
+
+} // namespace
+
+NSURL* NSURLWithGURL(const GURL& url) {
+ if (url.is_valid())
+ return URLWithUTF8String(url.spec());
+ return nil;
+}
+
+GURL GURLWithNSURL(NSURL* url) {
+ if (url)
+ return GURL([[url absoluteString] UTF8String]);
+ return GURL();
+}
« no previous file with comments | « url/mac/url_conversions.h ('k') | url/mac/url_conversions_unittest.mm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698