OLD | NEW |
(Empty) | |
| 1 // Copyright 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #import "url/mac/url_conversions.h" |
| 6 |
| 7 #import <Foundation/Foundation.h> |
| 8 |
| 9 #include "base/logging.h" |
| 10 #include "base/mac/scoped_nsobject.h" |
| 11 #include "base/strings/sys_string_conversions.h" |
| 12 #include "url/gurl.h" |
| 13 |
| 14 #if defined(OS_MACOSX) && !defined(OS_IOS) |
| 15 #include "base/mac/mac_util.h" |
| 16 #include "base/mac/sdk_forward_declarations.h" |
| 17 #endif // defined(OS_MACOSX) && !defined(OS_IOS) |
| 18 |
| 19 #if defined(OS_IOS) |
| 20 #include "base/ios/ios_util.h" |
| 21 #endif // defined(OS_IOS) |
| 22 |
| 23 namespace { |
| 24 |
| 25 // This function must only be called on OSX 10.7+ or iOS 4+. |
| 26 // Returns a valid NSURL (compliant with RFC 1738/1808/2396) from an NSString |
| 27 // that can contain any UTF-8 characters, representing a URI. |
| 28 // If the input string represents a fully RFC 3986-compliant URI the resulting |
| 29 // NSURL will be have the minimum conversion applied to be a valid NSURL with |
| 30 // minimum damage to the intention of the RFC 3986-compliant URI. |
| 31 // If the string represents a non-standard URI the method will make best-effort |
| 32 // to normalize it. Note that making a URI standards-compliant while preserving |
| 33 // its intention is *not* an exact science. This is why W3C *require* sites |
| 34 // encode their own links as only the site builder knows the true link |
| 35 // intention. |
| 36 NSURL* URLWithUTF8String(const std::string& string) { |
| 37 #if defined(OS_MACOSX) && !defined(OS_IOS) |
| 38 DCHECK(base::mac::IsOSLionOrLater()); |
| 39 #endif // defined(OS_MACOSX) && !defined(OS_IOS) |
| 40 |
| 41 #if defined(OS_IOS) |
| 42 DCHECK(base::ios::IsRunningOnIOS7OrLater()); |
| 43 #endif // defined(OS_IOS) |
| 44 |
| 45 // These are all the characters that NSURL *requires* to be encoded |
| 46 // throughout. |
| 47 // NSURL strictly enforces RFC 1738 which says: |
| 48 // [certain] characters are unsafe because |
| 49 // gateways and other transport agents are known to sometimes modify |
| 50 // such characters. These characters are "{", "}", "|", "\", "^", "~", |
| 51 // "[", "]", and "`". |
| 52 // All unsafe characters must always be encoded within a URL. |
| 53 // However RFC 3986 relaxes the policy and states: |
| 54 // characters in the reserved set [which includes these characters] |
| 55 // are protected from normalization and are therefore safe to be |
| 56 // used by scheme-specific and producer-specific algorithms for |
| 57 // delimiting data subcomponents within a URI. |
| 58 // Therefore we *can* expect these characters to appear in URIs but *should* |
| 59 // pct-encode them for RFC 1738. |
| 60 static NSString* replacements[] = { |
| 61 @" ", @"%20", |
| 62 @"\"", @"%22", |
| 63 @"<", @"%3C", |
| 64 @">", @"%3E", |
| 65 @"[", @"%5B", // NSURL will encode [ itself if we don't |
| 66 @"\\", @"%5C", |
| 67 @"]", @"%5D", // NSURL will encode ] itself if we don't |
| 68 @"^", @"%5E", |
| 69 @"{", @"%7B", |
| 70 @"|", @"%7C", |
| 71 @"}", @"%7D", |
| 72 @"`", @"%60"}; |
| 73 int size = arraysize(replacements); |
| 74 base::scoped_nsobject<NSMutableString> newString([[NSMutableString alloc] |
| 75 initWithString:(base::SysUTF8ToNSString(string))]); |
| 76 for (int i = 0; i < size / 2; i++) { |
| 77 [newString replaceOccurrencesOfString:replacements[i * 2] |
| 78 withString:replacements[i * 2 + 1] |
| 79 options:NSLiteralSearch |
| 80 range:NSMakeRange(0, [newString length])]; |
| 81 } |
| 82 // RFC 1738: |
| 83 // The character "%" is unsafe because it is used for |
| 84 // encodings of other characters. |
| 85 // All unsafe characters must always be encoded within a URL. |
| 86 // Conclusion; % symbols that do not begin valid pct-encoding sequences should |
| 87 // be pct-encoded (converted to %25). |
| 88 // However, % symbols that begin pct-encoded sequences should be left |
| 89 // untouched as these can appear in RFC 3986-compliant URIs. |
| 90 [newString replaceOccurrencesOfString:@"%(?![a-fA-F0-9]{2})" |
| 91 withString:@"%25" |
| 92 options:NSRegularExpressionSearch |
| 93 range:NSMakeRange(0, [newString length])]; |
| 94 |
| 95 // RFC 1738: |
| 96 // URLs are written only with the graphic printable characters of the |
| 97 // US-ASCII coded character set. The octets 80-FF hexadecimal are not used |
| 98 // in US-ASCII. [...] the octets 00-1F and 7F hexadecimal represent control |
| 99 // characters; these must be encoded. |
| 100 NSUInteger length = [newString length]; |
| 101 // String is iterated in reverse to allow easy replacement of fragments with |
| 102 // longer strings. |
| 103 for (NSUInteger i = [newString length] - 1; i < length; i--) { |
| 104 unichar uc = [newString characterAtIndex:i]; |
| 105 if (uc >= 0x7F || uc < 0x20) { |
| 106 NSString* asString; |
| 107 if (uc == 0x7F || uc < 0x20) { |
| 108 asString = [NSString stringWithFormat:@"%%%02X", uc]; |
| 109 } else if (uc <= 0x7ff) { |
| 110 // See row '11 Bits' from http://en.wikipedia.org/wiki/UTF-8#Design |
| 111 asString = [NSString stringWithFormat:@"%%%X%%%X", |
| 112 (6 << 5) | ((uc >> 6) & ((1 << 5) - 1)), |
| 113 (2 << 6) | (uc & ((1 << 6) - 1))]; |
| 114 } else { |
| 115 // See row '16 bits' from http://en.wikipedia.org/wiki/UTF-8#Design |
| 116 asString = [NSString stringWithFormat:@"%%%X%%%X%%%X", |
| 117 (14 << 4) | ((uc >> 12) & ((1 << 4) - 1)), |
| 118 (2 << 6) | ((uc >> 6) & ((1 << 6) - 1)), |
| 119 (2 << 6) | (uc & ((1 << 6) - 1))]; |
| 120 } |
| 121 [newString replaceCharactersInRange:NSMakeRange(i, 1) |
| 122 withString:asString]; |
| 123 } |
| 124 } |
| 125 |
| 126 // RFC 1738: |
| 127 // The character "#" is unsafe and should |
| 128 // always be encoded because it is used in World Wide Web and in other |
| 129 // systems to delimit a URL from a fragment/anchor identifier that might |
| 130 // follow it. |
| 131 // Conclusion; valid NSURLs can contain one # symbol only. The first # symbol |
| 132 // is likely to be a fragment divider so should be left untouched. |
| 133 NSUInteger firstHash = [newString rangeOfString:@"#"].location; |
| 134 if (firstHash != NSNotFound) { |
| 135 NSUInteger searchLength = [newString length] - (firstHash + 1); |
| 136 if (searchLength > 0) { |
| 137 [newString replaceOccurrencesOfString:@"#" |
| 138 withString:@"%23" |
| 139 options:NSLiteralSearch |
| 140 range:NSMakeRange(firstHash + 1, |
| 141 searchLength)]; |
| 142 } |
| 143 } |
| 144 |
| 145 NSURL* nsUrl = [NSURL URLWithString:newString]; |
| 146 // Failure to create a valid NSURL is a program error. |
| 147 DCHECK(nsUrl); |
| 148 return nsUrl; |
| 149 } |
| 150 |
| 151 } // namespace |
| 152 |
| 153 NSURL* NSURLWithGURL(const GURL& url) { |
| 154 if (url.is_valid()) |
| 155 return URLWithUTF8String(url.spec()); |
| 156 return nil; |
| 157 } |
| 158 |
| 159 GURL GURLWithNSURL(NSURL* url) { |
| 160 if (url) |
| 161 return GURL([[url absoluteString] UTF8String]); |
| 162 return GURL(); |
| 163 } |
OLD | NEW |