| Index: url/mac/url_conversions.mm
|
| diff --git a/url/mac/url_conversions.mm b/url/mac/url_conversions.mm
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..7cbabadad1600c52aa3fe912e35984a2a7b98159
|
| --- /dev/null
|
| +++ b/url/mac/url_conversions.mm
|
| @@ -0,0 +1,163 @@
|
| +// Copyright 2012 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#import "url/mac/url_conversions.h"
|
| +
|
| +#import <Foundation/Foundation.h>
|
| +
|
| +#include "base/logging.h"
|
| +#include "base/mac/scoped_nsobject.h"
|
| +#include "base/strings/sys_string_conversions.h"
|
| +#include "url/gurl.h"
|
| +
|
| +#if defined(OS_MACOSX) && !defined(OS_IOS)
|
| +#include "base/mac/mac_util.h"
|
| +#include "base/mac/sdk_forward_declarations.h"
|
| +#endif // defined(OS_MACOSX) && !defined(OS_IOS)
|
| +
|
| +#if defined(OS_IOS)
|
| +#include "base/ios/ios_util.h"
|
| +#endif // defined(OS_IOS)
|
| +
|
| +namespace {
|
| +
|
| +// This function must only be called on OSX 10.7+ or iOS 4+.
|
| +// Returns a valid NSURL (compliant with RFC 1738/1808/2396) from an NSString
|
| +// that can contain any UTF-8 characters, representing a URI.
|
| +// If the input string represents a fully RFC 3986-compliant URI the resulting
|
| +// NSURL will be have the minimum conversion applied to be a valid NSURL with
|
| +// minimum damage to the intention of the RFC 3986-compliant URI.
|
| +// If the string represents a non-standard URI the method will make best-effort
|
| +// to normalize it. Note that making a URI standards-compliant while preserving
|
| +// its intention is *not* an exact science. This is why W3C *require* sites
|
| +// encode their own links as only the site builder knows the true link
|
| +// intention.
|
| +NSURL* URLWithUTF8String(const std::string& string) {
|
| +#if defined(OS_MACOSX) && !defined(OS_IOS)
|
| + DCHECK(base::mac::IsOSLionOrLater());
|
| +#endif // defined(OS_MACOSX) && !defined(OS_IOS)
|
| +
|
| +#if defined(OS_IOS)
|
| + DCHECK(base::ios::IsRunningOnIOS7OrLater());
|
| +#endif // defined(OS_IOS)
|
| +
|
| + // These are all the characters that NSURL *requires* to be encoded
|
| + // throughout.
|
| + // NSURL strictly enforces RFC 1738 which says:
|
| + // [certain] characters are unsafe because
|
| + // gateways and other transport agents are known to sometimes modify
|
| + // such characters. These characters are "{", "}", "|", "\", "^", "~",
|
| + // "[", "]", and "`".
|
| + // All unsafe characters must always be encoded within a URL.
|
| + // However RFC 3986 relaxes the policy and states:
|
| + // characters in the reserved set [which includes these characters]
|
| + // are protected from normalization and are therefore safe to be
|
| + // used by scheme-specific and producer-specific algorithms for
|
| + // delimiting data subcomponents within a URI.
|
| + // Therefore we *can* expect these characters to appear in URIs but *should*
|
| + // pct-encode them for RFC 1738.
|
| + static NSString* replacements[] = {
|
| + @" ", @"%20",
|
| + @"\"", @"%22",
|
| + @"<", @"%3C",
|
| + @">", @"%3E",
|
| + @"[", @"%5B", // NSURL will encode [ itself if we don't
|
| + @"\\", @"%5C",
|
| + @"]", @"%5D", // NSURL will encode ] itself if we don't
|
| + @"^", @"%5E",
|
| + @"{", @"%7B",
|
| + @"|", @"%7C",
|
| + @"}", @"%7D",
|
| + @"`", @"%60"};
|
| + int size = arraysize(replacements);
|
| + base::scoped_nsobject<NSMutableString> newString([[NSMutableString alloc]
|
| + initWithString:(base::SysUTF8ToNSString(string))]);
|
| + for (int i = 0; i < size / 2; i++) {
|
| + [newString replaceOccurrencesOfString:replacements[i * 2]
|
| + withString:replacements[i * 2 + 1]
|
| + options:NSLiteralSearch
|
| + range:NSMakeRange(0, [newString length])];
|
| + }
|
| + // RFC 1738:
|
| + // The character "%" is unsafe because it is used for
|
| + // encodings of other characters.
|
| + // All unsafe characters must always be encoded within a URL.
|
| + // Conclusion; % symbols that do not begin valid pct-encoding sequences should
|
| + // be pct-encoded (converted to %25).
|
| + // However, % symbols that begin pct-encoded sequences should be left
|
| + // untouched as these can appear in RFC 3986-compliant URIs.
|
| + [newString replaceOccurrencesOfString:@"%(?![a-fA-F0-9]{2})"
|
| + withString:@"%25"
|
| + options:NSRegularExpressionSearch
|
| + range:NSMakeRange(0, [newString length])];
|
| +
|
| + // RFC 1738:
|
| + // URLs are written only with the graphic printable characters of the
|
| + // US-ASCII coded character set. The octets 80-FF hexadecimal are not used
|
| + // in US-ASCII. [...] the octets 00-1F and 7F hexadecimal represent control
|
| + // characters; these must be encoded.
|
| + NSUInteger length = [newString length];
|
| + // String is iterated in reverse to allow easy replacement of fragments with
|
| + // longer strings.
|
| + for (NSUInteger i = [newString length] - 1; i < length; i--) {
|
| + unichar uc = [newString characterAtIndex:i];
|
| + if (uc >= 0x7F || uc < 0x20) {
|
| + NSString* asString;
|
| + if (uc == 0x7F || uc < 0x20) {
|
| + asString = [NSString stringWithFormat:@"%%%02X", uc];
|
| + } else if (uc <= 0x7ff) {
|
| + // See row '11 Bits' from http://en.wikipedia.org/wiki/UTF-8#Design
|
| + asString = [NSString stringWithFormat:@"%%%X%%%X",
|
| + (6 << 5) | ((uc >> 6) & ((1 << 5) - 1)),
|
| + (2 << 6) | (uc & ((1 << 6) - 1))];
|
| + } else {
|
| + // See row '16 bits' from http://en.wikipedia.org/wiki/UTF-8#Design
|
| + asString = [NSString stringWithFormat:@"%%%X%%%X%%%X",
|
| + (14 << 4) | ((uc >> 12) & ((1 << 4) - 1)),
|
| + (2 << 6) | ((uc >> 6) & ((1 << 6) - 1)),
|
| + (2 << 6) | (uc & ((1 << 6) - 1))];
|
| + }
|
| + [newString replaceCharactersInRange:NSMakeRange(i, 1)
|
| + withString:asString];
|
| + }
|
| + }
|
| +
|
| + // RFC 1738:
|
| + // The character "#" is unsafe and should
|
| + // always be encoded because it is used in World Wide Web and in other
|
| + // systems to delimit a URL from a fragment/anchor identifier that might
|
| + // follow it.
|
| + // Conclusion; valid NSURLs can contain one # symbol only. The first # symbol
|
| + // is likely to be a fragment divider so should be left untouched.
|
| + NSUInteger firstHash = [newString rangeOfString:@"#"].location;
|
| + if (firstHash != NSNotFound) {
|
| + NSUInteger searchLength = [newString length] - (firstHash + 1);
|
| + if (searchLength > 0) {
|
| + [newString replaceOccurrencesOfString:@"#"
|
| + withString:@"%23"
|
| + options:NSLiteralSearch
|
| + range:NSMakeRange(firstHash + 1,
|
| + searchLength)];
|
| + }
|
| + }
|
| +
|
| + NSURL* nsUrl = [NSURL URLWithString:newString];
|
| + // Failure to create a valid NSURL is a program error.
|
| + DCHECK(nsUrl);
|
| + return nsUrl;
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +NSURL* NSURLWithGURL(const GURL& url) {
|
| + if (url.is_valid())
|
| + return URLWithUTF8String(url.spec());
|
| + return nil;
|
| +}
|
| +
|
| +GURL GURLWithNSURL(NSURL* url) {
|
| + if (url)
|
| + return GURL([[url absoluteString] UTF8String]);
|
| + return GURL();
|
| +}
|
|
|