net/base/net_util_icu.cc - Issue 1258813002: Implement a new IDN display policy

Side by Side Diff: net/base/net_util_icu.cc

Issue 1258813002: Implement a new IDN display policy (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: add back languages to one more, update comments Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/net_util.h"	5 #include "net/base/net_util.h"

6	6

7 #include <map>

8 #include <vector>	7 #include <vector>

9	8

10 #include "base/i18n/time_formatting.h"	9 #include "base/i18n/time_formatting.h"

11 #include "base/json/string_escape.h"	10 #include "base/json/string_escape.h"

12 #include "base/lazy_instance.h"	11 #include "base/lazy_instance.h"

13 #include "base/logging.h"	12 #include "base/logging.h"

14 #include "base/memory/singleton.h"	13 #include "base/memory/scoped_ptr.h"

15 #include "base/stl_util.h"

16 #include "base/strings/string_tokenizer.h"

17 #include "base/strings/string_util.h"	14 #include "base/strings/string_util.h"

18 #include "base/strings/utf_offset_string_conversions.h"	15 #include "base/strings/utf_offset_string_conversions.h"

19 #include "base/strings/utf_string_conversions.h"	16 #include "base/strings/utf_string_conversions.h"

20 #include "base/time/time.h"	17 #include "base/time/time.h"

21 #include "url/gurl.h"	18 #include "url/gurl.h"

22 #include "third_party/icu/source/common/unicode/uidna.h"	19 #include "third_party/icu/source/common/unicode/uidna.h"

23 #include "third_party/icu/source/common/unicode/uniset.h"	20 #include "third_party/icu/source/common/unicode/uniset.h"

24 #include "third_party/icu/source/common/unicode/uscript.h"

25 #include "third_party/icu/source/common/unicode/uset.h"

26 #include "third_party/icu/source/i18n/unicode/datefmt.h"	21 #include "third_party/icu/source/i18n/unicode/datefmt.h"

27 #include "third_party/icu/source/i18n/unicode/regex.h"	22 #include "third_party/icu/source/i18n/unicode/regex.h"

28 #include "third_party/icu/source/i18n/unicode/ulocdata.h"	23 #include "third_party/icu/source/i18n/unicode/uspoof.h"

29	24

30 using base::Time;	25 using base::Time;

31	26

32 namespace net {	27 namespace net {

33	28

34 namespace {	29 namespace {

35	30

36 typedef std::vector<size_t> Offsets;	31 typedef std::vector<size_t> Offsets;

37	32

38 // Does some simple normalization of scripts so we can allow certain scripts	33 class IDNSpoofChecker {
	Ryan Sleevi 2015/08/28 00:35:15 Document Document jungshik at Google 2015/09/01 19:47:07 Done. Show quoted text On 2015/08/28 00:35:15, Ryan Sleevi wrote: > Document Done.
39 // to exist together.	34 public:

40 // TODO(brettw) bug 880223: we should allow some other languages to be	35 IDNSpoofChecker();

41 // oombined such as Chinese and Latin. We will probably need a more	36 bool check(const base::char16* label, int label_len);
	Ryan Sleevi 2015/08/28 00:35:14 naming nit: These should be called Check DANGER: u naming nit: These should be called Check DANGER: using "int" for buffer length's is quite dangerous! Would a base::StringPiece16 work here (at least to control the danger). Alternatively, would size_t work with a checked cast? Ryan Sleevi 2015/08/28 00:35:15 Document Document jungshik at Google 2015/09/01 19:47:07 Done. Show quoted text On 2015/08/28 00:35:14, Ryan Sleevi wrote: > naming nit: These should be called Check > DANGER: using "int" for buffer length's is quite dangerous! Would a > base::StringPiece16 work here (at least to control the danger). Alternatively, > would size_t work with a checked cast? Done. jungshik at Google 2015/09/01 19:47:07 Done. Show quoted text On 2015/08/28 00:35:15, Ryan Sleevi wrote: > Document Done. jungshik at Google 2015/09/01 19:47:07 I switched to StringPiece16 (I thought about it bu Show quoted text On 2015/08/28 00:35:14, Ryan Sleevi wrote: > naming nit: These should be called Check > DANGER: using "int" for buffer length's is quite dangerous! Would a > base::StringPiece16 work here (at least to control the danger). Alternatively, > would size_t work with a checked cast? I switched to StringPiece16 (I thought about it but didn't bother...).
42 // complicated system of language pairs to have more fine-grained control.	37

43 UScriptCode NormalizeScript(UScriptCode code) {	38 private:

44 switch (code) {	39 USpoofChecker* checker_;

45 case USCRIPT_KATAKANA:	40 DISALLOW_COPY_AND_ASSIGN(IDNSpoofChecker);

46 case USCRIPT_HIRAGANA:	41 };

47 case USCRIPT_KATAKANA_OR_HIRAGANA:	42

48 case USCRIPT_HANGUL: // This one is arguable.	43 base::LazyInstance<IDNSpoofChecker>::Leaky g_idn_spoof_checker =

49 return USCRIPT_HAN;	44 LAZY_INSTANCE_INITIALIZER;

50 default:	45

51 return code;	46 class IDNSpoofCheckerExtra {
	Ryan Sleevi 2015/08/28 00:35:15 Document Document jungshik at Google 2015/09/01 19:47:07 Done. Show quoted text On 2015/08/28 00:35:15, Ryan Sleevi wrote: > Document Done.
52 }	47 public:

	48 IDNSpoofCheckerExtra();

	49 bool check(const base::char16* label, int label_len);
	Ryan Sleevi 2015/08/28 00:35:15 Same comments as above Same comments as above jungshik at Google 2015/09/01 19:47:07 Done. Show quoted text On 2015/08/28 00:35:15, Ryan Sleevi wrote: > Same comments as above Done.
	50

	51 private:

	52 icu::UnicodeSet non_ascii_latin_;

	53 icu::RegexPattern* dangerous_pattern_;

	54 DISALLOW_COPY_AND_ASSIGN(IDNSpoofCheckerExtra);

	55 };

	56

	57 base::LazyInstance<IDNSpoofCheckerExtra>::Leaky g_idn_spoof_checker_extra =

	58 LAZY_INSTANCE_INITIALIZER;

	59

	60 IDNSpoofChecker::IDNSpoofChecker() {

	61 UErrorCode status = U_ZERO_ERROR;

	62 checker_ = uspoof_open(&status);

	63 DCHECK(U_SUCCESS(status)) << "spoof checker failed to open with error: "
	Ryan Sleevi 2015/08/28 00:35:15 Is DCHECK really appropriate? Does this indicate p Is DCHECK really appropriate? Does this indicate programmer error? Or fatal library failure? jungshik at Google 2015/09/01 19:47:07 This should never happen unless for some reason, I Show quoted text On 2015/08/28 00:35:15, Ryan Sleevi wrote: > Is DCHECK really appropriate? Does this indicate programmer error? Or fatal > library failure? This should never happen unless for some reason, ICU data is not available or ICU data is broken/malformed. Unfortunately, there are mysterious crash reports that could only happen with ICU data missing/unavailable/broken on Android / iOS (e.g. internal bug : b/23186531 ). If the ICU data is missing/malformed/.., we're hosed anyway and we can just use CHECK here. Alternatively, we can move along (disabling IDN spoof check and displaying all the IDNs in punycode) hoping that the rest of ICU is somehow all right.
	64 << u_errorName(status);

	65

	66 // Use 'hightly restrictive' restritiction level to limit the script mixing

	67 // to Latin + Han + {Hiragana + Katakana, Bopomofo, Hangul}.

	68 // See http://www.unicode.org/reports/tr39/#Restriction_Level_Detection

	69 // The default is highly restrictive so that it's not set explicitly.

	70 // TODO(jshin): Firefox uses 'moderately restrictive' by default. Review

	71 // using that, instead.

	72 uspoof_setRestrictionLevel(checker_, USPOOF_HIGHLY_RESTRICTIVE);

	73

	74 // The recommended set and inclusion set come from

	75 // http://unicode.org/reports/tr39/ and

	76 // http://www.unicode.org/Public/security/latest/xidmodifications.txt

	77 // The list can undergo some changes as a new version of Unicode is

	78 // released and we update our copy of ICU.

	79 const icu::UnicodeSet* recommended_set =

	80 uspoof_getRecommendedUnicodeSet(&status);

	81 icu::UnicodeSet allowed_set;

	82 allowed_set.addAll(*recommended_set);

	83 const icu::UnicodeSet* inclusion_set = uspoof_getInclusionUnicodeSet(&status);

	84 allowed_set.addAll(*inclusion_set);

	85

	86 // From UAX 31 Table 6:

	87 // http://www.unicode.org/reports/tr31/#Aspirational_Use_Scripts

	88 const icu::UnicodeSet aspirational_scripts(

	89 UNICODE_STRING_SIMPLE(

	90 "[[:sc=Cans:][:sc=Plrd:][:sc=Mong:][:sc=Tfng:][:sc=Yiii:]]"),

	91 status);

	92 allowed_set.addAll(aspirational_scripts);

	93

	94 // Add 'Black Heart Suit' and 'Circled White Star'.

	95 // TODO(jshin): How about other heart-like characters and Emoji (e.g.

	96 // U+1F600) ?

	97 allowed_set.add(0x2665u);

	98 allowed_set.add(0x272au);

	99

	100 // Remove the following three characters listed in Mozilla's blacklist (

	101 // http://kb.mozillazine.org/Network.IDN.blacklist_chars ) but

	102 // not yet excluded from \|allowed_set\| up to this point:

	103 // Combining Long Solidus Overlay, Hebrew Punctuation Gershayim, and

	104 // Hyphenation Point

	105 allowed_set.remove(0x338u); // Combining Long Solidus Overlay

	106 allowed_set.remove(0x5f4u); // Hebrew Punctuation Gershayim

	107 allowed_set.remove(0x2027u); // Hyphenation Point

	108

	109 // TODO(jshin): Decide what to do with '+' and U+0020. For now, leave

	110 // them out as Mozilla does.

	111 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, &status);

	112

	113 int32_t checks = uspoof_getChecks(checker_, &status);

	114 // Do not allow mixed numbering systems (e.g. ASCII digits and

	115 // Devanagari digits) or invisible characters or multiple occurrences

	116 // there is a script mixing.

	117 checks \|= USPOOF_MIXED_NUMBERS \| USPOOF_AUX_INFO;

	118

	119 // USPOOF_INVISBLE should be on by this point without being

	120 // explicitly turned on.

	121 DCHECK(checks & USPOOF_INVISIBLE);

	122

	123 // Disable whole-script-confusable check because even 'pax' (Latin)

	124 // and b<u-umlaut>cher cannot pass the test because Cyrillic/Greek have

	125 // confusable characters for all letters in them.

	126 // TODO(jshin): Disabling this check has a downside. One way to alleviate

	127 // is to check against a list of well known good domain names.

	128 checks ^= USPOOF_WHOLE_SCRIPT_CONFUSABLE;

	129

	130 uspoof_setChecks(checker_, checks, &status);

	131 DCHECK(U_SUCCESS(status));

53 }	132 }

54	133

55 bool IsIDNComponentInSingleScript(const base::char16* str, int str_len) {	134 inline bool IDNSpoofChecker::check(const base::char16* label, int label_len) {

56 UScriptCode first_script = USCRIPT_INVALID_CODE;	135 UErrorCode status = U_ZERO_ERROR;

57 bool is_first = true;	136 int32_t results = uspoof_check(checker_, label, label_len, NULL, &status);

	137 DCHECK(U_SUCCESS(status));
	Ryan Sleevi 2015/08/28 00:35:15 Shouldn't you actually handle if this can fail? Shouldn't you actually handle if this can fail? jungshik at Google 2015/09/01 19:47:07 I agree. Done. Show quoted text On 2015/08/28 00:35:15, Ryan Sleevi wrote: > Shouldn't you actually handle if this can fail? I agree. Done.
	138 if (results & USPOOF_ALL_CHECKS)

	139 return false;

58	140

59 int i = 0;	141 // If there's no script mixing, the input passes without any extra check.

60 while (i < str_len) {	142 if (results == USPOOF_ASCII \|\| results == USPOOF_SINGLE_SCRIPT_RESTRICTIVE)

61 unsigned code_point;	143 return true;

62 U16_NEXT(str, i, str_len, code_point);

63	144

64 UErrorCode err = U_ZERO_ERROR;	145 return g_idn_spoof_checker_extra.Get().check(label, label_len);

65 UScriptCode cur_script = uscript_getScript(code_point, &err);

66 if (err != U_ZERO_ERROR)

67 return false; // Report mixed on error.

68 cur_script = NormalizeScript(cur_script);

69

70 // TODO(brettw) We may have to check for USCRIPT_INHERENT as well.

71 if (is_first && cur_script != USCRIPT_COMMON) {

72 first_script = cur_script;

73 is_first = false;

74 } else {

75 if (cur_script != USCRIPT_COMMON && cur_script != first_script)

76 return false;

77 }

78 }

79 return true;

80 }	146 }

81	147

82 // Check if the script of a language can be 'safely' mixed with	148 IDNSpoofCheckerExtra::IDNSpoofCheckerExtra() {

83 // Latin letters in the ASCII range.	149 UErrorCode status = U_ZERO_ERROR;

84 bool IsCompatibleWithASCIILetters(const std::string& lang) {	150 non_ascii_latin_ = icu::UnicodeSet(

85 // For now, just list Chinese, Japanese and Korean (positive list).	151 UNICODE_STRING_SIMPLE("[[:sc=Latn:] - [a-zA-Z]]"), status);

86 // An alternative is negative-listing (languages using Greek and	152

87 // Cyrillic letters), but it can be more dangerous.	153 dangerous_pattern_ = icu::RegexPattern::compile(

88 return !lang.substr(0, 2).compare("zh") \|\|	154 UNICODE_STRING_SIMPLE(

89 !lang.substr(0, 2).compare("ja") \|\|	155 // Lone (out-of-context) katakana no, so, zo, or n

90 !lang.substr(0, 2).compare("ko");	156 // They can be mistaken for a slash.

	157 "[^\\p{Katakana}][\\u30ce\\u30f3\\u30bd\\u30be][^\\p{Katakana}]"

	158 // Repeating Japanese accent characters. USPOOF_INVISIBLE

	159 // only checks for a repeated occurence of the same combining

	160 // mark, but we block a sequence of similary looking

	161 // Japanese combining marks as well.

	162 "\|[\\u3099-\\u309c][\\u3099-\\u309c]"),

	163 0, status);

	164 DCHECK(U_SUCCESS(status));

91 }	165 }

92	166

93 typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap;	167 inline bool IDNSpoofCheckerExtra::check(const base::char16* label,

	168 int label_len) {

	169 // This is called only if script mixing is detected.

	170 // Limit Latin letters that can be mixed with other scripts to

	171 // ASCII-Latin instead of any Latin.

	172 icu::UnicodeString label_string(FALSE, label, label_len);

	173 if (non_ascii_latin_.containsSome(label_string))

	174 return false;

94	175

95 class LangToExemplarSet {	176 UErrorCode status = U_ZERO_ERROR;

96 public:	177 scoped_ptr<icu::RegexMatcher> dangerous_pattern_matcher(

97 static LangToExemplarSet* GetInstance() {	178 dangerous_pattern_->matcher(label_string, status));

98 return Singleton<LangToExemplarSet>::get();	179 DCHECK(U_SUCCESS(status));

99 }	180 return !dangerous_pattern_matcher->find();

100	181

101 private:	182 // TODO(jshin): Check spoofing attempt against a list of 'good' domains

102 LangToExemplarSetMap map;

103 LangToExemplarSet() { }

104 ~LangToExemplarSet() {

105 STLDeleteContainerPairSecondPointers(map.begin(), map.end());

106 }

107

108 friend class Singleton<LangToExemplarSet>;

109 friend struct DefaultSingletonTraits<LangToExemplarSet>;

110 friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**);

111 friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*);

112

113 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet);

114 };

115

116 bool GetExemplarSetForLang(const std::string& lang,

117 icu::UnicodeSet** lang_set) {

118 const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;

119 LangToExemplarSetMap::const_iterator pos = map.find(lang);

120 if (pos != map.end()) {

121 *lang_set = pos->second;

122 return true;

123 }

124 return false;

125 }

126

127 void SetExemplarSetForLang(const std::string& lang,

128 icu::UnicodeSet* lang_set) {

129 LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;

130 map.insert(std::make_pair(lang, lang_set));

131 }

132

133 static base::LazyInstance<base::Lock>::Leaky

134 g_lang_set_lock = LAZY_INSTANCE_INITIALIZER;

135

136 // Returns true if all the characters in component_characters are used by

137 // the language \|lang\|.

138 bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters,

139 const std::string& lang) {

140 CR_DEFINE_STATIC_LOCAL(

141 const icu::UnicodeSet, kASCIILetters, ('a', 'z'));

142 icu::UnicodeSet* lang_set = nullptr;

143 // We're called from both the UI thread and the history thread.

144 {

145 base::AutoLock lock(g_lang_set_lock.Get());

146 if (!GetExemplarSetForLang(lang, &lang_set)) {

147 UErrorCode status = U_ZERO_ERROR;

148 ULocaleData* uld = ulocdata_open(lang.c_str(), &status);

149 // TODO(jungshik) Turn this check on when the ICU data file is

150 // rebuilt with the minimal subset of locale data for languages

151 // to which Chrome is not localized but which we offer in the list

152 // of languages selectable for Accept-Languages. With the rebuilt ICU

153 // data, ulocdata_open never should fall back to the default locale.

154 // (issue 2078)

155 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING);

156 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) {

157 lang_set = reinterpret_cast<icu::UnicodeSet*>(ulocdata_getExemplarSet(

158 uld, nullptr, 0, ULOCDATA_ES_STANDARD, &status));

159 // On success, if \|lang\| is compatible with ASCII Latin letters, add

160 // them.

161 if (lang_set && IsCompatibleWithASCIILetters(lang))

162 lang_set->addAll(kASCIILetters);

163 }

164

165 if (!lang_set)

166 lang_set = new icu::UnicodeSet(1, 0);

167

168 lang_set->freeze();

169 SetExemplarSetForLang(lang, lang_set);

170 ulocdata_close(uld);

171 }

172 }

173 return !lang_set->isEmpty() && lang_set->containsAll(component_characters);

174 }	183 }

175	184

176 // Returns true if the given Unicode host component is safe to display to the	185 // Returns true if the given Unicode host component is safe to display to the

177 // user.	186 // user.

178 bool IsIDNComponentSafe(const base::char16* str,	187 bool IsIDNComponentSafe(const base::char16* label, int label_len) {

179 int str_len,	188 return g_idn_spoof_checker.Get().check(label, label_len);

180 const std::string& languages) {

181 // Most common cases (non-IDN) do not reach here so that we don't

182 // need a fast return path.

183 // TODO(jungshik) : Check if there's any character inappropriate

184 // (although allowed) for domain names.

185 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and

186 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt

187 // For now, we borrow the list from Mozilla and tweaked it slightly.

188 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because

189 // they're gonna be canonicalized to U+0020 and full stop before

190 // reaching here.)

191 // The original list is available at

192 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and

193 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703

194

195 UErrorCode status = U_ZERO_ERROR;

196 #ifdef U_WCHAR_IS_UTF16

197 icu::UnicodeSet dangerous_characters(

198 icu::UnicodeString(

199 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"

200 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"

201 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"

202 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"

203 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"

204 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"

205 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"

206 L"[\ufffa-\ufffd]\U0001f50f\U0001f510\U0001f512\U0001f513]"),

207 status);

208 DCHECK(U_SUCCESS(status));

209 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(

210 // Lone katakana no, so, or n

211 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"

212 // Repeating Japanese accent characters

213 L"\|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),

214 0, status);

215 #else

216 icu::UnicodeSet dangerous_characters(icu::UnicodeString(

217 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"

218 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"

219 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"

220 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"

221 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"

222 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"

223 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"

224 "[\\ufffa-\\ufffd]\\U0001f50f\\U0001f510\\U0001f512\\U0001f513]", -1,

225 US_INV), status);

226 DCHECK(U_SUCCESS(status));

227 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(

228 // Lone katakana no, so, or n

229 "[^\\p{Katakana}][\\u30ce\\u30f3\\u30bd][^\\p{Katakana}]"

230 // Repeating Japanese accent characters

231 "\|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"),

232 0, status);

233 #endif

234 DCHECK(U_SUCCESS(status));

235 icu::UnicodeSet component_characters;

236 icu::UnicodeString component_string(str, str_len);

237 component_characters.addAll(component_string);

238 if (dangerous_characters.containsSome(component_characters))

239 return false;

240

241 DCHECK(U_SUCCESS(status));

242 dangerous_patterns.reset(component_string);

243 if (dangerous_patterns.find())

244 return false;

245

246 // If the language list is empty, the result is completely determined

247 // by whether a component is a single script or not. This will block

248 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are

249 // allowed with \|languages\| (while it blocks Chinese + Latin letters with

250 // an accent as should be the case), but we want to err on the safe side

251 // when \|languages\| is empty.

252 if (languages.empty())

253 return IsIDNComponentInSingleScript(str, str_len);

254

255 // \|common_characters\| is made up of ASCII numbers, hyphen, plus and

256 // underscore that are used across scripts and allowed in domain names.

257 // (sync'd with characters allowed in url_canon_host with square

258 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.

259 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),

260 status);

261 DCHECK(U_SUCCESS(status));

262 // Subtract common characters because they're always allowed so that

263 // we just have to check if a language-specific set contains

264 // the remainder.

265 component_characters.removeAll(common_characters);

266

267 base::StringTokenizer t(languages, ",");

268 while (t.GetNext()) {

269 if (IsComponentCoveredByLang(component_characters, t.token()))

270 return true;

271 }

272 return false;

273 }	189 }

274	190

275 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to	191 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to

276 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().	192 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().

277 //	193 //

278 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with	194 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with

279 // the backward compatibility in mind. What it does:	195 // the backward compatibility in mind. What it does:

280 //	196 //

281 // 1. Use the up-to-date Unicode data.	197 // 1. Use the up-to-date Unicode data.

282 // 2. Define a case folding/mapping with the up-to-date Unicode data as	198 // 2. Define a case folding/mapping with the up-to-date Unicode data as

(...skipping 15 matching lines...) Expand all Loading...
298 // TODO(jungshik): Change options as different parties (browsers,	214 // TODO(jungshik): Change options as different parties (browsers,

299 // registrars, search engines) converge toward a consensus.	215 // registrars, search engines) converge toward a consensus.

300 value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);	216 value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);

301 if (U_FAILURE(err))	217 if (U_FAILURE(err))

302 value = NULL;	218 value = NULL;

303 }	219 }

304	220

305 UIDNA* value;	221 UIDNA* value;

306 };	222 };

307	223

308 static base::LazyInstance<UIDNAWrapper>::Leaky	224 base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER;

309 g_uidna = LAZY_INSTANCE_INITIALIZER;

310	225

311 // Converts one component of a host (between dots) to IDN if safe. The result	226 // Converts one component (label) of a host (between dots) to Unicode if safe.

312 // will be APPENDED to the given output string and will be the same as the input	227 // The result will be APPENDED to the given output string and will be the

313 // if it is not IDN or the IDN is unsafe to display. Returns whether any	228 // same as the input if it is not Punycode or the IDN is unsafe to display.

314 // conversion was performed.	229 // Returns whether any conversion was performed.

315 bool IDNToUnicodeOneComponent(const base::char16* comp,	230 bool IDNToUnicodeOneComponent(const base::char16* comp,

316 size_t comp_len,	231 size_t comp_len,

317 const std::string& languages,

318 base::string16* out) {	232 base::string16* out) {

319 DCHECK(out);	233 DCHECK(out);

320 if (comp_len == 0)	234 if (comp_len == 0)

321 return false;	235 return false;

322	236

323 // Only transform if the input can be an IDN component.	237 // Only transform if the input can be an IDN component.

324 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};	238 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};

325 if ((comp_len > arraysize(kIdnPrefix)) &&	239 if ((comp_len > arraysize(kIdnPrefix)) &&

326 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {	240 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {

327 UIDNA* uidna = g_uidna.Get().value;	241 UIDNA* uidna = g_uidna.Get().value;

(...skipping 10 matching lines...) Expand all Loading...
338 // the conversion again, but with a sufficiently large buffer.	252 // the conversion again, but with a sufficiently large buffer.

339 output_length = uidna_labelToUnicode(	253 output_length = uidna_labelToUnicode(

340 uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length],	254 uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length],

341 output_length, &info, &status);	255 output_length, &info, &status);

342 } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));	256 } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));

343	257

344 if (U_SUCCESS(status) && info.errors == 0) {	258 if (U_SUCCESS(status) && info.errors == 0) {

345 // Converted successfully. Ensure that the converted component	259 // Converted successfully. Ensure that the converted component

346 // can be safely displayed to the user.	260 // can be safely displayed to the user.

347 out->resize(original_length + output_length);	261 out->resize(original_length + output_length);

348 if (IsIDNComponentSafe(out->data() + original_length, output_length,	262 if (IsIDNComponentSafe(out->data() + original_length, output_length))

349 languages))

350 return true;	263 return true;

351 }	264 }

352	265

353 // Something went wrong. Revert to original string.	266 // Something went wrong. Revert to original string.

354 out->resize(original_length);	267 out->resize(original_length);

355 }	268 }

356	269

357 // We get here with no IDN or on error, in which case we just append the	270 // We get here with no IDN or on error, in which case we just append the

358 // literal input.	271 // literal input.

359 out->append(comp, comp_len);	272 out->append(comp, comp_len);

360 return false;	273 return false;

361 }	274 }

362	275

363 // TODO(brettw) bug 734373: check the scripts for each host component and	276 // TODO(brettw) We may want to skip this step in the case of file URLs to

364 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for	277 // allow unicode UNC hostnames regardless of encodings.

365 // scripts that the user has installed. For now, just put the entire

366 // path through IDN. Maybe this feature can be implemented in ICU itself?

367 //

368 // We may want to skip this step in the case of file URLs to allow unicode

369 // UNC hostnames regardless of encodings.

370 base::string16 IDNToUnicodeWithAdjustments(	278 base::string16 IDNToUnicodeWithAdjustments(

371 const std::string& host,	279 const std::string& host,

372 const std::string& languages,

373 base::OffsetAdjuster::Adjustments* adjustments) {	280 base::OffsetAdjuster::Adjustments* adjustments) {

374 if (adjustments)	281 if (adjustments)

375 adjustments->clear();	282 adjustments->clear();

376 // Convert the ASCII input to a base::string16 for ICU.	283 // Convert the ASCII input to a base::string16 for ICU.

377 base::string16 input16;	284 base::string16 input16;

378 input16.reserve(host.length());	285 input16.reserve(host.length());

379 input16.insert(input16.end(), host.begin(), host.end());	286 input16.insert(input16.end(), host.begin(), host.end());

380	287

381 // Do each component of the host separately, since we enforce script matching	288 // Do each component of the host separately, since we enforce script matching

382 // on a per-component basis.	289 // on a per-component basis.

383 base::string16 out16;	290 base::string16 out16;

384 {	291 {

385 for (size_t component_start = 0, component_end;	292 for (size_t component_start = 0, component_end;

386 component_start < input16.length();	293 component_start < input16.length();

387 component_start = component_end + 1) {	294 component_start = component_end + 1) {

388 // Find the end of the component.	295 // Find the end of the component.

389 component_end = input16.find('.', component_start);	296 component_end = input16.find('.', component_start);

390 if (component_end == base::string16::npos)	297 if (component_end == base::string16::npos)

391 component_end = input16.length(); // For getting the last component.	298 component_end = input16.length(); // For getting the last component.

392 size_t component_length = component_end - component_start;	299 size_t component_length = component_end - component_start;

393 size_t new_component_start = out16.length();	300 size_t new_component_start = out16.length();

394 bool converted_idn = false;	301 bool converted_idn = false;

395 if (component_end > component_start) {	302 if (component_end > component_start) {

396 // Add the substring that we just found.	303 // Add the substring that we just found.

397 converted_idn = IDNToUnicodeOneComponent(	304 converted_idn = IDNToUnicodeOneComponent(

398 input16.data() + component_start, component_length, languages,	305 input16.data() + component_start, component_length, &out16);

399 &out16);

400 }	306 }

401 size_t new_component_length = out16.length() - new_component_start;	307 size_t new_component_length = out16.length() - new_component_start;

402	308

403 if (converted_idn && adjustments) {	309 if (converted_idn && adjustments) {

404 adjustments->push_back(base::OffsetAdjuster::Adjustment(	310 adjustments->push_back(base::OffsetAdjuster::Adjustment(

405 component_start, component_length, new_component_length));	311 component_start, component_length, new_component_length));

406 }	312 }

407	313

408 // Need to add the dot we just found (if we found one).	314 // Need to add the dot we just found (if we found one).

409 if (component_end < input16.length())	315 if (component_end < input16.length())

(...skipping 19 matching lines...) Expand all Loading...
429 AdjustComponent(delta, &(parsed->host));	335 AdjustComponent(delta, &(parsed->host));

430 AdjustComponent(delta, &(parsed->port));	336 AdjustComponent(delta, &(parsed->port));

431 AdjustComponent(delta, &(parsed->path));	337 AdjustComponent(delta, &(parsed->path));

432 AdjustComponent(delta, &(parsed->query));	338 AdjustComponent(delta, &(parsed->query));

433 AdjustComponent(delta, &(parsed->ref));	339 AdjustComponent(delta, &(parsed->ref));

434 }	340 }

435	341

436 // Helper for FormatUrlWithOffsets().	342 // Helper for FormatUrlWithOffsets().

437 base::string16 FormatViewSourceUrl(	343 base::string16 FormatViewSourceUrl(

438 const GURL& url,	344 const GURL& url,

439 const std::string& languages,

440 FormatUrlTypes format_types,	345 FormatUrlTypes format_types,

441 UnescapeRule::Type unescape_rules,	346 UnescapeRule::Type unescape_rules,

442 url::Parsed* new_parsed,	347 url::Parsed* new_parsed,

443 size_t* prefix_end,	348 size_t* prefix_end,

444 base::OffsetAdjuster::Adjustments* adjustments) {	349 base::OffsetAdjuster::Adjustments* adjustments) {

445 DCHECK(new_parsed);	350 DCHECK(new_parsed);

446 const char kViewSource[] = "view-source:";	351 const char kViewSource[] = "view-source:";

447 const size_t kViewSourceLength = arraysize(kViewSource) - 1;	352 const size_t kViewSourceLength = arraysize(kViewSource) - 1;

448	353

449 // Format the underlying URL and record adjustments.	354 // Format the underlying URL and record adjustments.

450 const std::string& url_str(url.possibly_invalid_spec());	355 const std::string& url_str(url.possibly_invalid_spec());

451 adjustments->clear();	356 adjustments->clear();

452 base::string16 result(base::ASCIIToUTF16(kViewSource) +	357 base::string16 result(

	358 base::ASCIIToUTF16(kViewSource) +

453 FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)),	359 FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)),

454 languages, format_types, unescape_rules,	360 std::string(), format_types, unescape_rules,

455 new_parsed, prefix_end, adjustments));	361 new_parsed, prefix_end, adjustments));

456 // Revise \|adjustments\| by shifting to the offsets to prefix that the above	362 // Revise \|adjustments\| by shifting to the offsets to prefix that the above

457 // call to FormatUrl didn't get to see.	363 // call to FormatUrl didn't get to see.

458 for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin();	364 for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin();

459 it != adjustments->end(); ++it)	365 it != adjustments->end(); ++it)

460 it->original_offset += kViewSourceLength;	366 it->original_offset += kViewSourceLength;

461	367

462 // Adjust positions of the parsed components.	368 // Adjust positions of the parsed components.

463 if (new_parsed->scheme.is_nonempty()) {	369 if (new_parsed->scheme.is_nonempty()) {

464 // Assume "view-source:real-scheme" as a scheme.	370 // Assume "view-source:real-scheme" as a scheme.

(...skipping 19 matching lines...) Expand all Loading...
484 const std::string& component_text,	390 const std::string& component_text,

485 base::OffsetAdjuster::Adjustments* adjustments) const = 0;	391 base::OffsetAdjuster::Adjustments* adjustments) const = 0;

486	392

487 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an	393 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an

488 // accessible copy constructor in order to call AppendFormattedComponent()	394 // accessible copy constructor in order to call AppendFormattedComponent()

489 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).	395 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).

490 };	396 };

491	397

492 class HostComponentTransform : public AppendComponentTransform {	398 class HostComponentTransform : public AppendComponentTransform {

493 public:	399 public:

494 explicit HostComponentTransform(const std::string& languages)	400 explicit HostComponentTransform() {}

495 : languages_(languages) {

496 }

497	401

498 private:	402 private:

499 base::string16 Execute(	403 base::string16 Execute(

500 const std::string& component_text,	404 const std::string& component_text,

501 base::OffsetAdjuster::Adjustments* adjustments) const override {	405 base::OffsetAdjuster::Adjustments* adjustments) const override {

502 return IDNToUnicodeWithAdjustments(component_text, languages_,	406 return IDNToUnicodeWithAdjustments(component_text, adjustments);

503 adjustments);

504 }	407 }

505

506 const std::string& languages_;

507 };	408 };

508	409

509 class NonHostComponentTransform : public AppendComponentTransform {	410 class NonHostComponentTransform : public AppendComponentTransform {

510 public:	411 public:

511 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules)	412 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules)

512 : unescape_rules_(unescape_rules) {	413 : unescape_rules_(unescape_rules) {

513 }	414 }

514	415

515 private:	416 private:

516 base::string16 Execute(	417 base::string16 Execute(

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
566 if (output_component) {	467 if (output_component) {

567 output_component->begin = static_cast<int>(output_component_begin);	468 output_component->begin = static_cast<int>(output_component_begin);

568 output_component->len =	469 output_component->len =

569 static_cast<int>(output->length() - output_component_begin);	470 static_cast<int>(output->length() - output_component_begin);

570 }	471 }

571 } else if (output_component) {	472 } else if (output_component) {

572 output_component->reset();	473 output_component->reset();

573 }	474 }

574 }	475 }

575	476

576 } // namespace	477 } // anonymous namespace

577	478

578 const FormatUrlType kFormatUrlOmitNothing = 0;	479 const FormatUrlType kFormatUrlOmitNothing = 0;

579 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;	480 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;

580 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;	481 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;

581 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;	482 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;

582 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword \|	483 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword \|

583 kFormatUrlOmitHTTP \| kFormatUrlOmitTrailingSlashOnBareHostname;	484 kFormatUrlOmitHTTP \| kFormatUrlOmitTrailingSlashOnBareHostname;

584	485

585 base::string16 IDNToUnicode(const std::string& host,	486 base::string16 IDNToUnicode(const std::string& host,

586 const std::string& languages) {	487 const std::string& languages) {

587 return IDNToUnicodeWithAdjustments(host, languages, NULL);	488 return IDNToUnicodeWithAdjustments(host, NULL);

588 }	489 }

589	490

590 std::string GetDirectoryListingEntry(const base::string16& name,	491 std::string GetDirectoryListingEntry(const base::string16& name,

591 const std::string& raw_bytes,	492 const std::string& raw_bytes,

592 bool is_dir,	493 bool is_dir,

593 int64_t size,	494 int64_t size,

594 Time modified) {	495 Time modified) {

595 std::string result;	496 std::string result;

596 result.append("<script>addRow(");	497 result.append("<script>addRow(");

597 base::EscapeJSONString(name, true, &result);	498 base::EscapeJSONString(name, true, &result);

(...skipping 26 matching lines...) Expand all Loading...
624	525

625 result.append(");</script>\n");	526 result.append(");</script>\n");

626	527

627 return result;	528 return result;

628 }	529 }

629	530

630 void AppendFormattedHost(const GURL& url,	531 void AppendFormattedHost(const GURL& url,

631 const std::string& languages,	532 const std::string& languages,

632 base::string16* output) {	533 base::string16* output) {

633 AppendFormattedComponent(url.possibly_invalid_spec(),	534 AppendFormattedComponent(url.possibly_invalid_spec(),

634 url.parsed_for_possibly_invalid_spec().host,	535 url.parsed_for_possibly_invalid_spec().host,

635 HostComponentTransform(languages), output, NULL, NULL);	536 HostComponentTransform(), output, NULL, NULL);

636 }	537 }

637	538

638 base::string16 FormatUrlWithOffsets(	539 base::string16 FormatUrlWithOffsets(

639 const GURL& url,	540 const GURL& url,

640 const std::string& languages,	541 const std::string& languages,

641 FormatUrlTypes format_types,	542 FormatUrlTypes format_types,

642 UnescapeRule::Type unescape_rules,	543 UnescapeRule::Type unescape_rules,

643 url::Parsed* new_parsed,	544 url::Parsed* new_parsed,

644 size_t* prefix_end,	545 size_t* prefix_end,

645 std::vector<size_t>* offsets_for_adjustment) {	546 std::vector<size_t>* offsets_for_adjustment) {

646 base::OffsetAdjuster::Adjustments adjustments;	547 base::OffsetAdjuster::Adjustments adjustments;

647 const base::string16& format_url_return_value =	548 const base::string16& format_url_return_value =

648 FormatUrlWithAdjustments(url, languages, format_types, unescape_rules,	549 FormatUrlWithAdjustments(url, std::string(), format_types, unescape_rules,

649 new_parsed, prefix_end, &adjustments);	550 new_parsed, prefix_end, &adjustments);

650 base::OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);	551 base::OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);

651 if (offsets_for_adjustment) {	552 if (offsets_for_adjustment) {

652 std::for_each(	553 std::for_each(

653 offsets_for_adjustment->begin(),	554 offsets_for_adjustment->begin(),

654 offsets_for_adjustment->end(),	555 offsets_for_adjustment->end(),

655 base::LimitOffset<std::string>(format_url_return_value.length()));	556 base::LimitOffset<std::string>(format_url_return_value.length()));

656 }	557 }

657 return format_url_return_value;	558 return format_url_return_value;

658 }	559 }

(...skipping 15 matching lines...) Expand all Loading...
674 *new_parsed = url::Parsed();	575 *new_parsed = url::Parsed();

675	576

676 // Special handling for view-source:. Don't use content::kViewSourceScheme	577 // Special handling for view-source:. Don't use content::kViewSourceScheme

677 // because this library shouldn't depend on chrome.	578 // because this library shouldn't depend on chrome.

678 const char kViewSource[] = "view-source";	579 const char kViewSource[] = "view-source";

679 // Reject "view-source:view-source:..." to avoid deep recursion.	580 // Reject "view-source:view-source:..." to avoid deep recursion.

680 const char kViewSourceTwice[] = "view-source:view-source:";	581 const char kViewSourceTwice[] = "view-source:view-source:";

681 if (url.SchemeIs(kViewSource) &&	582 if (url.SchemeIs(kViewSource) &&

682 !base::StartsWith(url.possibly_invalid_spec(), kViewSourceTwice,	583 !base::StartsWith(url.possibly_invalid_spec(), kViewSourceTwice,

683 base::CompareCase::INSENSITIVE_ASCII)) {	584 base::CompareCase::INSENSITIVE_ASCII)) {

684 return FormatViewSourceUrl(url, languages, format_types,	585 return FormatViewSourceUrl(url, format_types, unescape_rules, new_parsed,

685 unescape_rules, new_parsed, prefix_end,	586 prefix_end, adjustments);

686 adjustments);

687 }	587 }

688	588

689 // We handle both valid and invalid URLs (this will give us the spec	589 // We handle both valid and invalid URLs (this will give us the spec

690 // regardless of validity).	590 // regardless of validity).

691 const std::string& spec = url.possibly_invalid_spec();	591 const std::string& spec = url.possibly_invalid_spec();

692 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec();	592 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec();

693	593

694 // Scheme & separators. These are ASCII.	594 // Scheme & separators. These are ASCII.

695 base::string16 url_string;	595 base::string16 url_string;

696 url_string.insert(	596 url_string.insert(

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
746 AppendFormattedComponent(spec, parsed.password,	646 AppendFormattedComponent(spec, parsed.password,

747 NonHostComponentTransform(unescape_rules),	647 NonHostComponentTransform(unescape_rules),

748 &url_string, &new_parsed->password, adjustments);	648 &url_string, &new_parsed->password, adjustments);

749 if (parsed.username.is_valid() \|\| parsed.password.is_valid())	649 if (parsed.username.is_valid() \|\| parsed.password.is_valid())

750 url_string.push_back('@');	650 url_string.push_back('@');

751 }	651 }

752 if (prefix_end)	652 if (prefix_end)

753 *prefix_end = static_cast<size_t>(url_string.length());	653 *prefix_end = static_cast<size_t>(url_string.length());

754	654

755 // Host.	655 // Host.

756 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(languages),	656 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(),

757 &url_string, &new_parsed->host, adjustments);	657 &url_string, &new_parsed->host, adjustments);

758	658

759 // Port.	659 // Port.

760 if (parsed.port.is_nonempty()) {	660 if (parsed.port.is_nonempty()) {

761 url_string.push_back(':');	661 url_string.push_back(':');

762 new_parsed->port.begin = url_string.length();	662 new_parsed->port.begin = url_string.length();

763 url_string.insert(url_string.end(),	663 url_string.insert(url_string.end(),

764 spec.begin() + parsed.port.begin,	664 spec.begin() + parsed.port.begin,

765 spec.begin() + parsed.port.end());	665 spec.begin() + parsed.port.end());

766 new_parsed->port.len = url_string.length() - new_parsed->port.begin;	666 new_parsed->port.len = url_string.length() - new_parsed->port.begin;

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
822 base::string16 FormatUrl(const GURL& url,	722 base::string16 FormatUrl(const GURL& url,

823 const std::string& languages,	723 const std::string& languages,

824 FormatUrlTypes format_types,	724 FormatUrlTypes format_types,

825 UnescapeRule::Type unescape_rules,	725 UnescapeRule::Type unescape_rules,

826 url::Parsed* new_parsed,	726 url::Parsed* new_parsed,

827 size_t* prefix_end,	727 size_t* prefix_end,

828 size_t* offset_for_adjustment) {	728 size_t* offset_for_adjustment) {

829 Offsets offsets;	729 Offsets offsets;

830 if (offset_for_adjustment)	730 if (offset_for_adjustment)

831 offsets.push_back(*offset_for_adjustment);	731 offsets.push_back(*offset_for_adjustment);

832 base::string16 result = FormatUrlWithOffsets(url, languages, format_types,	732 base::string16 result =

833 unescape_rules, new_parsed, prefix_end, &offsets);	733 FormatUrlWithOffsets(url, std::string(), format_types, unescape_rules,

	734 new_parsed, prefix_end, &offsets);

834 if (offset_for_adjustment)	735 if (offset_for_adjustment)

835 *offset_for_adjustment = offsets[0];	736 *offset_for_adjustment = offsets[0];

836 return result;	737 return result;

837 }	738 }

838	739

839 } // namespace net	740 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_icu_unittest.cc » ('j') | no next file with comments »