net/base/net_util_icu.cc - Issue 642403002: git cl format the first third of the net/base directory

Side by Side Diff: net/base/net_util_icu.cc

Issue 642403002: git cl format the first third of the net/base directory (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Nit Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/net_util.h"	5 #include "net/base/net_util.h"

6	6

7 #include <map>	7 #include <map>

8 #include <vector>	8 #include <vector>

9	9

10 #include "base/i18n/time_formatting.h"	10 #include "base/i18n/time_formatting.h"

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
78 }	78 }

79 return true;	79 return true;

80 }	80 }

81	81

82 // Check if the script of a language can be 'safely' mixed with	82 // Check if the script of a language can be 'safely' mixed with

83 // Latin letters in the ASCII range.	83 // Latin letters in the ASCII range.

84 bool IsCompatibleWithASCIILetters(const std::string& lang) {	84 bool IsCompatibleWithASCIILetters(const std::string& lang) {

85 // For now, just list Chinese, Japanese and Korean (positive list).	85 // For now, just list Chinese, Japanese and Korean (positive list).

86 // An alternative is negative-listing (languages using Greek and	86 // An alternative is negative-listing (languages using Greek and

87 // Cyrillic letters), but it can be more dangerous.	87 // Cyrillic letters), but it can be more dangerous.

88 return !lang.substr(0, 2).compare("zh") \|\|	88 return !lang.substr(0, 2).compare("zh") \|\| !lang.substr(0, 2).compare("ja") \|\|

89 !lang.substr(0, 2).compare("ja") \|\|

90 !lang.substr(0, 2).compare("ko");	89 !lang.substr(0, 2).compare("ko");

91 }	90 }

92	91

93 typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap;	92 typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap;

94	93

95 class LangToExemplarSet {	94 class LangToExemplarSet {

96 public:	95 public:

97 static LangToExemplarSet* GetInstance() {	96 static LangToExemplarSet* GetInstance() {

98 return Singleton<LangToExemplarSet>::get();	97 return Singleton<LangToExemplarSet>::get();

99 }	98 }

100	99

101 private:	100 private:

102 LangToExemplarSetMap map;	101 LangToExemplarSetMap map;

103 LangToExemplarSet() { }	102 LangToExemplarSet() {}

104 ~LangToExemplarSet() {	103 ~LangToExemplarSet() {

105 STLDeleteContainerPairSecondPointers(map.begin(), map.end());	104 STLDeleteContainerPairSecondPointers(map.begin(), map.end());

106 }	105 }

107	106

108 friend class Singleton<LangToExemplarSet>;	107 friend class Singleton<LangToExemplarSet>;

109 friend struct DefaultSingletonTraits<LangToExemplarSet>;	108 friend struct DefaultSingletonTraits<LangToExemplarSet>;

110 friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**);	109 friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**);

111 friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*);	110 friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*);

112	111

113 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet);	112 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet);

114 };	113 };

115	114

116 bool GetExemplarSetForLang(const std::string& lang,	115 bool GetExemplarSetForLang(const std::string& lang,

117 icu::UnicodeSet** lang_set) {	116 icu::UnicodeSet** lang_set) {

118 const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;	117 const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;

119 LangToExemplarSetMap::const_iterator pos = map.find(lang);	118 LangToExemplarSetMap::const_iterator pos = map.find(lang);

120 if (pos != map.end()) {	119 if (pos != map.end()) {

121 *lang_set = pos->second;	120 *lang_set = pos->second;

122 return true;	121 return true;

123 }	122 }

124 return false;	123 return false;

125 }	124 }

126	125

127 void SetExemplarSetForLang(const std::string& lang,	126 void SetExemplarSetForLang(const std::string& lang, icu::UnicodeSet* lang_set) {

128 icu::UnicodeSet* lang_set) {

129 LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;	127 LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;

130 map.insert(std::make_pair(lang, lang_set));	128 map.insert(std::make_pair(lang, lang_set));

131 }	129 }

132	130

133 static base::LazyInstance<base::Lock>::Leaky	131 static base::LazyInstance<base::Lock>::Leaky g_lang_set_lock =

134 g_lang_set_lock = LAZY_INSTANCE_INITIALIZER;	132 LAZY_INSTANCE_INITIALIZER;

135	133

136 // Returns true if all the characters in component_characters are used by	134 // Returns true if all the characters in component_characters are used by

137 // the language \|lang\|.	135 // the language \|lang\|.

138 bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters,	136 bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters,

139 const std::string& lang) {	137 const std::string& lang) {

140 CR_DEFINE_STATIC_LOCAL(	138 CR_DEFINE_STATIC_LOCAL(const icu::UnicodeSet, kASCIILetters, ('a', 'z'));

141 const icu::UnicodeSet, kASCIILetters, ('a', 'z'));

142 icu::UnicodeSet* lang_set = NULL;	139 icu::UnicodeSet* lang_set = NULL;

143 // We're called from both the UI thread and the history thread.	140 // We're called from both the UI thread and the history thread.

144 {	141 {

145 base::AutoLock lock(g_lang_set_lock.Get());	142 base::AutoLock lock(g_lang_set_lock.Get());

146 if (!GetExemplarSetForLang(lang, &lang_set)) {	143 if (!GetExemplarSetForLang(lang, &lang_set)) {

147 UErrorCode status = U_ZERO_ERROR;	144 UErrorCode status = U_ZERO_ERROR;

148 ULocaleData* uld = ulocdata_open(lang.c_str(), &status);	145 ULocaleData* uld = ulocdata_open(lang.c_str(), &status);

149 // TODO(jungshik) Turn this check on when the ICU data file is	146 // TODO(jungshik) Turn this check on when the ICU data file is

150 // rebuilt with the minimal subset of locale data for languages	147 // rebuilt with the minimal subset of locale data for languages

151 // to which Chrome is not localized but which we offer in the list	148 // to which Chrome is not localized but which we offer in the list

152 // of languages selectable for Accept-Languages. With the rebuilt ICU	149 // of languages selectable for Accept-Languages. With the rebuilt ICU

153 // data, ulocdata_open never should fall back to the default locale.	150 // data, ulocdata_open never should fall back to the default locale.

154 // (issue 2078)	151 // (issue 2078)

155 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING);	152 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING);

156 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) {	153 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) {

157 lang_set = reinterpret_cast<icu::UnicodeSet *>(	154 lang_set = reinterpret_cast<icu::UnicodeSet*>(ulocdata_getExemplarSet(

158 ulocdata_getExemplarSet(uld, NULL, 0,	155 uld, NULL, 0, ULOCDATA_ES_STANDARD, &status));

159 ULOCDATA_ES_STANDARD, &status));

160 // If \|lang\| is compatible with ASCII Latin letters, add them.	156 // If \|lang\| is compatible with ASCII Latin letters, add them.

161 if (IsCompatibleWithASCIILetters(lang))	157 if (IsCompatibleWithASCIILetters(lang))

162 lang_set->addAll(kASCIILetters);	158 lang_set->addAll(kASCIILetters);

163 } else {	159 } else {

164 lang_set = new icu::UnicodeSet(1, 0);	160 lang_set = new icu::UnicodeSet(1, 0);

165 }	161 }

166 lang_set->freeze();	162 lang_set->freeze();

167 SetExemplarSetForLang(lang, lang_set);	163 SetExemplarSetForLang(lang, lang_set);

168 ulocdata_close(uld);	164 ulocdata_close(uld);

169 }	165 }

(...skipping 11 matching lines...) Expand all Loading...
181 // TODO(jungshik) : Check if there's any character inappropriate	177 // TODO(jungshik) : Check if there's any character inappropriate

182 // (although allowed) for domain names.	178 // (although allowed) for domain names.

183 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and	179 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and

184 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt	180 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt

185 // For now, we borrow the list from Mozilla and tweaked it slightly.	181 // For now, we borrow the list from Mozilla and tweaked it slightly.

186 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because	182 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because

187 // they're gonna be canonicalized to U+0020 and full stop before	183 // they're gonna be canonicalized to U+0020 and full stop before

188 // reaching here.)	184 // reaching here.)

189 // The original list is available at	185 // The original list is available at

190 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and	186 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and

191 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703	187 // at

	188 // http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703

192	189

193 UErrorCode status = U_ZERO_ERROR;	190 UErrorCode status = U_ZERO_ERROR;

194 #ifdef U_WCHAR_IS_UTF16	191 #ifdef U_WCHAR_IS_UTF16

195 icu::UnicodeSet dangerous_characters(icu::UnicodeString(	192 icu::UnicodeSet dangerous_characters(

196 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"	193 icu::UnicodeString(

197 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"	194 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"

198 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"	195 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"

199 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"	196 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"

200 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"	197 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"

201 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"	198 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"

202 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"	199 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"

203 L"[\ufffa-\ufffd]]"), status);	200 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"

	201 L"[\ufffa-\ufffd]]"),

	202 status);

204 DCHECK(U_SUCCESS(status));	203 DCHECK(U_SUCCESS(status));

205 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(	204 icu::RegexMatcher dangerous_patterns(

206 // Lone katakana no, so, or n	205 icu::UnicodeString(

207 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"	206 // Lone katakana no, so, or n

208 // Repeating Japanese accent characters	207 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"

209 L"\|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),	208 // Repeating Japanese accent characters

	209 L"\|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),

210 0, status);	210 0, status);

211 #else	211 #else

212 icu::UnicodeSet dangerous_characters(icu::UnicodeString(	212 icu::UnicodeSet dangerous_characters(

213 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"	213 icu::UnicodeString(

214 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"	214 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"

215 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"	215 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"

216 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"	216 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"

217 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"	217 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"

218 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"	218 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"

219 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"	219 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe"

220 "[\\ufffa-\\ufffd]]", -1, US_INV), status);	220 "14"

	221 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\uff"

	222 "f9]"

	223 "[\\ufffa-\\ufffd]]",

	224 -1, US_INV),

	225 status);

221 DCHECK(U_SUCCESS(status));	226 DCHECK(U_SUCCESS(status));

222 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(	227 icu::RegexMatcher dangerous_patterns(

223 // Lone katakana no, so, or n	228 icu::UnicodeString(

224 "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]"	229 // Lone katakana no, so, or n

225 // Repeating Japanese accent characters	230 "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]"

226 "\|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"),	231 // Repeating Japanese accent characters

	232 "\|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"),

227 0, status);	233 0, status);

228 #endif	234 #endif

229 DCHECK(U_SUCCESS(status));	235 DCHECK(U_SUCCESS(status));

230 icu::UnicodeSet component_characters;	236 icu::UnicodeSet component_characters;

231 icu::UnicodeString component_string(str, str_len);	237 icu::UnicodeString component_string(str, str_len);

232 component_characters.addAll(component_string);	238 component_characters.addAll(component_string);

233 if (dangerous_characters.containsSome(component_characters))	239 if (dangerous_characters.containsSome(component_characters))

234 return false;	240 return false;

235	241

236 DCHECK(U_SUCCESS(status));	242 DCHECK(U_SUCCESS(status));

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
293 // TODO(jungshik): Change options as different parties (browsers,	299 // TODO(jungshik): Change options as different parties (browsers,

294 // registrars, search engines) converge toward a consensus.	300 // registrars, search engines) converge toward a consensus.

295 value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);	301 value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);

296 if (U_FAILURE(err))	302 if (U_FAILURE(err))

297 value = NULL;	303 value = NULL;

298 }	304 }

299	305

300 UIDNA* value;	306 UIDNA* value;

301 };	307 };

302	308

303 static base::LazyInstance<UIDNAWrapper>::Leaky	309 static base::LazyInstance<UIDNAWrapper>::Leaky g_uidna =

304 g_uidna = LAZY_INSTANCE_INITIALIZER;	310 LAZY_INSTANCE_INITIALIZER;

305	311

306 // Converts one component of a host (between dots) to IDN if safe. The result	312 // Converts one component of a host (between dots) to IDN if safe. The result

307 // will be APPENDED to the given output string and will be the same as the input	313 // will be APPENDED to the given output string and will be the same as the input

308 // if it is not IDN or the IDN is unsafe to display. Returns whether any	314 // if it is not IDN or the IDN is unsafe to display. Returns whether any

309 // conversion was performed.	315 // conversion was performed.

310 bool IDNToUnicodeOneComponent(const base::char16* comp,	316 bool IDNToUnicodeOneComponent(const base::char16* comp,

311 size_t comp_len,	317 size_t comp_len,

312 const std::string& languages,	318 const std::string& languages,

313 base::string16* out) {	319 base::string16* out) {

314 DCHECK(out);	320 DCHECK(out);

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
382 component_start = component_end + 1) {	388 component_start = component_end + 1) {

383 // Find the end of the component.	389 // Find the end of the component.

384 component_end = input16.find('.', component_start);	390 component_end = input16.find('.', component_start);

385 if (component_end == base::string16::npos)	391 if (component_end == base::string16::npos)

386 component_end = input16.length(); // For getting the last component.	392 component_end = input16.length(); // For getting the last component.

387 size_t component_length = component_end - component_start;	393 size_t component_length = component_end - component_start;

388 size_t new_component_start = out16.length();	394 size_t new_component_start = out16.length();

389 bool converted_idn = false;	395 bool converted_idn = false;

390 if (component_end > component_start) {	396 if (component_end > component_start) {

391 // Add the substring that we just found.	397 // Add the substring that we just found.

392 converted_idn = IDNToUnicodeOneComponent(	398 converted_idn =

393 input16.data() + component_start, component_length, languages,	399 IDNToUnicodeOneComponent(input16.data() + component_start,

394 &out16);	400 component_length, languages, &out16);

395 }	401 }

396 size_t new_component_length = out16.length() - new_component_start;	402 size_t new_component_length = out16.length() - new_component_start;

397	403

398 if (converted_idn && adjustments) {	404 if (converted_idn && adjustments) {

399 adjustments->push_back(base::OffsetAdjuster::Adjustment(	405 adjustments->push_back(base::OffsetAdjuster::Adjustment(

400 component_start, component_length, new_component_length));	406 component_start, component_length, new_component_length));

401 }	407 }

402	408

403 // Need to add the dot we just found (if we found one).	409 // Need to add the dot we just found (if we found one).

404 if (component_end < input16.length())	410 if (component_end < input16.length())

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
437 url::Parsed* new_parsed,	443 url::Parsed* new_parsed,

438 size_t* prefix_end,	444 size_t* prefix_end,

439 base::OffsetAdjuster::Adjustments* adjustments) {	445 base::OffsetAdjuster::Adjustments* adjustments) {

440 DCHECK(new_parsed);	446 DCHECK(new_parsed);

441 const char kViewSource[] = "view-source:";	447 const char kViewSource[] = "view-source:";

442 const size_t kViewSourceLength = arraysize(kViewSource) - 1;	448 const size_t kViewSourceLength = arraysize(kViewSource) - 1;

443	449

444 // Format the underlying URL and record adjustments.	450 // Format the underlying URL and record adjustments.

445 const std::string& url_str(url.possibly_invalid_spec());	451 const std::string& url_str(url.possibly_invalid_spec());

446 adjustments->clear();	452 adjustments->clear();

447 base::string16 result(base::ASCIIToUTF16(kViewSource) +	453 base::string16 result(

	454 base::ASCIIToUTF16(kViewSource) +

448 FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)),	455 FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)),

449 languages, format_types, unescape_rules,	456 languages, format_types, unescape_rules,

450 new_parsed, prefix_end, adjustments));	457 new_parsed, prefix_end, adjustments));

451 // Revise \|adjustments\| by shifting to the offsets to prefix that the above	458 // Revise \|adjustments\| by shifting to the offsets to prefix that the above

452 // call to FormatUrl didn't get to see.	459 // call to FormatUrl didn't get to see.

453 for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin();	460 for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin();

454 it != adjustments->end(); ++it)	461 it != adjustments->end(); ++it)

455 it->original_offset += kViewSourceLength;	462 it->original_offset += kViewSourceLength;

456	463

457 // Adjust positions of the parsed components.	464 // Adjust positions of the parsed components.

(...skipping 22 matching lines...) Expand all Loading...
480 base::OffsetAdjuster::Adjustments* adjustments) const = 0;	487 base::OffsetAdjuster::Adjustments* adjustments) const = 0;

481	488

482 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an	489 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an

483 // accessible copy constructor in order to call AppendFormattedComponent()	490 // accessible copy constructor in order to call AppendFormattedComponent()

484 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).	491 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).

485 };	492 };

486	493

487 class HostComponentTransform : public AppendComponentTransform {	494 class HostComponentTransform : public AppendComponentTransform {

488 public:	495 public:

489 explicit HostComponentTransform(const std::string& languages)	496 explicit HostComponentTransform(const std::string& languages)

490 : languages_(languages) {	497 : languages_(languages) {}

491 }

492	498

493 private:	499 private:

494 base::string16 Execute(	500 base::string16 Execute(

495 const std::string& component_text,	501 const std::string& component_text,

496 base::OffsetAdjuster::Adjustments* adjustments) const override {	502 base::OffsetAdjuster::Adjustments* adjustments) const override {

497 return IDNToUnicodeWithAdjustments(component_text, languages_,	503 return IDNToUnicodeWithAdjustments(component_text, languages_, adjustments);

498 adjustments);

499 }	504 }

500	505

501 const std::string& languages_;	506 const std::string& languages_;

502 };	507 };

503	508

504 class NonHostComponentTransform : public AppendComponentTransform {	509 class NonHostComponentTransform : public AppendComponentTransform {

505 public:	510 public:

506 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules)	511 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules)

507 : unescape_rules_(unescape_rules) {	512 : unescape_rules_(unescape_rules) {}

508 }

509	513

510 private:	514 private:

511 base::string16 Execute(	515 base::string16 Execute(

512 const std::string& component_text,	516 const std::string& component_text,

513 base::OffsetAdjuster::Adjustments* adjustments) const override {	517 base::OffsetAdjuster::Adjustments* adjustments) const override {

514 return (unescape_rules_ == UnescapeRule::NONE) ?	518 return (unescape_rules_ == UnescapeRule::NONE)

515 base::UTF8ToUTF16WithAdjustments(component_text, adjustments) :	519 ? base::UTF8ToUTF16WithAdjustments(component_text, adjustments)

516 UnescapeAndDecodeUTF8URLComponentWithAdjustments(component_text,	520 : UnescapeAndDecodeUTF8URLComponentWithAdjustments(

517 unescape_rules_, adjustments);	521 component_text, unescape_rules_, adjustments);

518 }	522 }

519	523

520 const UnescapeRule::Type unescape_rules_;	524 const UnescapeRule::Type unescape_rules_;

521 };	525 };

522	526

523 // Transforms the portion of \|spec\| covered by \|original_component\| according to	527 // Transforms the portion of \|spec\| covered by \|original_component\| according to

524 // \|transform\|. Appends the result to \|output\|. If \|output_component\| is	528 // \|transform\|. Appends the result to \|output\|. If \|output_component\| is

525 // non-NULL, its start and length are set to the transformed component's new	529 // non-NULL, its start and length are set to the transformed component's new

526 // start and length. If \|adjustments\| is non-NULL, appends adjustments (if	530 // start and length. If \|adjustments\| is non-NULL, appends adjustments (if

527 // any) that reflect the transformation the original component underwent to	531 // any) that reflect the transformation the original component underwent to

(...skipping 13 matching lines...) Expand all Loading...
541 static_cast<size_t>(original_component.len));	545 static_cast<size_t>(original_component.len));

542	546

543 // Transform \|component_str\| and modify \|adjustments\| appropriately.	547 // Transform \|component_str\| and modify \|adjustments\| appropriately.

544 base::OffsetAdjuster::Adjustments component_transform_adjustments;	548 base::OffsetAdjuster::Adjustments component_transform_adjustments;

545 output->append(	549 output->append(

546 transform.Execute(component_str, &component_transform_adjustments));	550 transform.Execute(component_str, &component_transform_adjustments));

547	551

548 // Shift all the adjustments made for this component so the offsets are	552 // Shift all the adjustments made for this component so the offsets are

549 // valid for the original string and add them to \|adjustments\|.	553 // valid for the original string and add them to \|adjustments\|.

550 for (base::OffsetAdjuster::Adjustments::iterator comp_iter =	554 for (base::OffsetAdjuster::Adjustments::iterator comp_iter =

551 component_transform_adjustments.begin();	555 component_transform_adjustments.begin();

552 comp_iter != component_transform_adjustments.end(); ++comp_iter)	556 comp_iter != component_transform_adjustments.end(); ++comp_iter)

553 comp_iter->original_offset += original_component_begin;	557 comp_iter->original_offset += original_component_begin;

554 if (adjustments) {	558 if (adjustments) {

555 adjustments->insert(adjustments->end(),	559 adjustments->insert(adjustments->end(),

556 component_transform_adjustments.begin(),	560 component_transform_adjustments.begin(),

557 component_transform_adjustments.end());	561 component_transform_adjustments.end());

558 }	562 }

559	563

560 // Set positions of the parsed component.	564 // Set positions of the parsed component.

561 if (output_component) {	565 if (output_component) {

562 output_component->begin = static_cast<int>(output_component_begin);	566 output_component->begin = static_cast<int>(output_component_begin);

563 output_component->len =	567 output_component->len =

564 static_cast<int>(output->length() - output_component_begin);	568 static_cast<int>(output->length() - output_component_begin);

565 }	569 }

566 } else if (output_component) {	570 } else if (output_component) {

567 output_component->reset();	571 output_component->reset();

568 }	572 }

569 }	573 }

570	574

571 } // namespace	575 } // namespace

572	576

573 const FormatUrlType kFormatUrlOmitNothing = 0;	577 const FormatUrlType kFormatUrlOmitNothing = 0;

574 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;	578 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;

575 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;	579 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;

576 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;	580 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;

577 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword \|	581 const FormatUrlType kFormatUrlOmitAll =

578 kFormatUrlOmitHTTP \| kFormatUrlOmitTrailingSlashOnBareHostname;	582 kFormatUrlOmitUsernamePassword \| kFormatUrlOmitHTTP \|

	583 kFormatUrlOmitTrailingSlashOnBareHostname;

579	584

580 base::string16 IDNToUnicode(const std::string& host,	585 base::string16 IDNToUnicode(const std::string& host,

581 const std::string& languages) {	586 const std::string& languages) {

582 return IDNToUnicodeWithAdjustments(host, languages, NULL);	587 return IDNToUnicodeWithAdjustments(host, languages, NULL);

583 }	588 }

584	589

585 std::string GetDirectoryListingEntry(const base::string16& name,	590 std::string GetDirectoryListingEntry(const base::string16& name,

586 const std::string& raw_bytes,	591 const std::string& raw_bytes,

587 bool is_dir,	592 bool is_dir,

588 int64 size,	593 int64 size,

(...skipping 29 matching lines...) Expand all Loading...
618 base::EscapeJSONString(modified_str, true, &result);	623 base::EscapeJSONString(modified_str, true, &result);

619	624

620 result.append(");</script>\n");	625 result.append(");</script>\n");

621	626

622 return result;	627 return result;

623 }	628 }

624	629

625 void AppendFormattedHost(const GURL& url,	630 void AppendFormattedHost(const GURL& url,

626 const std::string& languages,	631 const std::string& languages,

627 base::string16* output) {	632 base::string16* output) {

628 AppendFormattedComponent(url.possibly_invalid_spec(),	633 AppendFormattedComponent(

629 url.parsed_for_possibly_invalid_spec().host,	634 url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host,

630 HostComponentTransform(languages), output, NULL, NULL);	635 HostComponentTransform(languages), output, NULL, NULL);

631 }	636 }

632	637

633 base::string16 FormatUrlWithOffsets(	638 base::string16 FormatUrlWithOffsets(

634 const GURL& url,	639 const GURL& url,

635 const std::string& languages,	640 const std::string& languages,

636 FormatUrlTypes format_types,	641 FormatUrlTypes format_types,

637 UnescapeRule::Type unescape_rules,	642 UnescapeRule::Type unescape_rules,

638 url::Parsed* new_parsed,	643 url::Parsed* new_parsed,

639 size_t* prefix_end,	644 size_t* prefix_end,

640 std::vector<size_t>* offsets_for_adjustment) {	645 std::vector<size_t>* offsets_for_adjustment) {

641 base::OffsetAdjuster::Adjustments adjustments;	646 base::OffsetAdjuster::Adjustments adjustments;

642 const base::string16& format_url_return_value =	647 const base::string16& format_url_return_value =

643 FormatUrlWithAdjustments(url, languages, format_types, unescape_rules,	648 FormatUrlWithAdjustments(url, languages, format_types, unescape_rules,

644 new_parsed, prefix_end, &adjustments);	649 new_parsed, prefix_end, &adjustments);

645 base::OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);	650 base::OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);

646 if (offsets_for_adjustment) {	651 if (offsets_for_adjustment) {

647 std::for_each(	652 std::for_each(

648 offsets_for_adjustment->begin(),	653 offsets_for_adjustment->begin(), offsets_for_adjustment->end(),

649 offsets_for_adjustment->end(),

650 base::LimitOffset<std::string>(format_url_return_value.length()));	654 base::LimitOffset<std::string>(format_url_return_value.length()));

651 }	655 }

652 return format_url_return_value;	656 return format_url_return_value;

653 }	657 }

654	658

655 base::string16 FormatUrlWithAdjustments(	659 base::string16 FormatUrlWithAdjustments(

656 const GURL& url,	660 const GURL& url,

657 const std::string& languages,	661 const std::string& languages,

658 FormatUrlTypes format_types,	662 FormatUrlTypes format_types,

659 UnescapeRule::Type unescape_rules,	663 UnescapeRule::Type unescape_rules,

660 url::Parsed* new_parsed,	664 url::Parsed* new_parsed,

661 size_t* prefix_end,	665 size_t* prefix_end,

662 base::OffsetAdjuster::Adjustments* adjustments) {	666 base::OffsetAdjuster::Adjustments* adjustments) {

663 DCHECK(adjustments != NULL);	667 DCHECK(adjustments != NULL);

664 adjustments->clear();	668 adjustments->clear();

665 url::Parsed parsed_temp;	669 url::Parsed parsed_temp;

666 if (!new_parsed)	670 if (!new_parsed)

667 new_parsed = &parsed_temp;	671 new_parsed = &parsed_temp;

668 else	672 else

669 *new_parsed = url::Parsed();	673 *new_parsed = url::Parsed();

670	674

671 // Special handling for view-source:. Don't use content::kViewSourceScheme	675 // Special handling for view-source:. Don't use content::kViewSourceScheme

672 // because this library shouldn't depend on chrome.	676 // because this library shouldn't depend on chrome.

673 const char* const kViewSource = "view-source";	677 const char* const kViewSource = "view-source";

674 // Reject "view-source:view-source:..." to avoid deep recursion.	678 // Reject "view-source:view-source:..." to avoid deep recursion.

675 const char* const kViewSourceTwice = "view-source:view-source:";	679 const char* const kViewSourceTwice = "view-source:view-source:";

676 if (url.SchemeIs(kViewSource) &&	680 if (url.SchemeIs(kViewSource) &&

677 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {	681 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {

678 return FormatViewSourceUrl(url, languages, format_types,	682 return FormatViewSourceUrl(url, languages, format_types, unescape_rules,

679 unescape_rules, new_parsed, prefix_end,	683 new_parsed, prefix_end, adjustments);

680 adjustments);

681 }	684 }

682	685

683 // We handle both valid and invalid URLs (this will give us the spec	686 // We handle both valid and invalid URLs (this will give us the spec

684 // regardless of validity).	687 // regardless of validity).

685 const std::string& spec = url.possibly_invalid_spec();	688 const std::string& spec = url.possibly_invalid_spec();

686 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec();	689 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec();

687	690

688 // Scheme & separators. These are ASCII.	691 // Scheme & separators. These are ASCII.

689 base::string16 url_string;	692 base::string16 url_string;

690 url_string.insert(	693 url_string.insert(

691 url_string.end(), spec.begin(),	694 url_string.end(), spec.begin(),

692 spec.begin() + parsed.CountCharactersBefore(url::Parsed::USERNAME, true));	695 spec.begin() + parsed.CountCharactersBefore(url::Parsed::USERNAME, true));

693 const char kHTTP[] = "http://";	696 const char kHTTP[] = "http://";

694 const char kFTP[] = "ftp.";	697 const char kFTP[] = "ftp.";

695 // url_fixer::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This	698 // url_fixer::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This

696 // means that if we trim "http://" off a URL whose host starts with "ftp." and	699 // means that if we trim "http://" off a URL whose host starts with "ftp." and

697 // the user inputs this into any field subject to fixup (which is basically	700 // the user inputs this into any field subject to fixup (which is basically

698 // all input fields), the meaning would be changed. (In fact, often the	701 // all input fields), the meaning would be changed. (In fact, often the

699 // formatted URL is directly pre-filled into an input field.) For this reason	702 // formatted URL is directly pre-filled into an input field.) For this reason

700 // we avoid stripping "http://" in this case.	703 // we avoid stripping "http://" in this case.

701 bool omit_http = (format_types & kFormatUrlOmitHTTP) &&	704 bool omit_http = (format_types & kFormatUrlOmitHTTP) &&

702 EqualsASCII(url_string, kHTTP) &&	705 EqualsASCII(url_string, kHTTP) &&

703 !StartsWithASCII(url.host(), kFTP, true);	706 !StartsWithASCII(url.host(), kFTP, true);

704 new_parsed->scheme = parsed.scheme;	707 new_parsed->scheme = parsed.scheme;

705	708

706 // Username & password.	709 // Username & password.

707 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {	710 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {

708 // Remove the username and password fields. We don't want to display those	711 // Remove the username and password fields. We don't want to display those

709 // to the user since they can be used for attacks,	712 // to the user since they can be used for attacks,

710 // e.g. "http://google.com:search@evil.ru/"	713 // e.g. "http://google.com:search@evil.ru/"

711 new_parsed->username.reset();	714 new_parsed->username.reset();

712 new_parsed->password.reset();	715 new_parsed->password.reset();

713 // Update the adjustments based on removed username and/or password.	716 // Update the adjustments based on removed username and/or password.

714 if (parsed.username.is_nonempty() \|\| parsed.password.is_nonempty()) {	717 if (parsed.username.is_nonempty() \|\| parsed.password.is_nonempty()) {

715 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {	718 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {

716 // The seeming off-by-two is to account for the ':' after the username	719 // The seeming off-by-two is to account for the ':' after the username

717 // and '@' after the password.	720 // and '@' after the password.

718 adjustments->push_back(base::OffsetAdjuster::Adjustment(	721 adjustments->push_back(base::OffsetAdjuster::Adjustment(

719 static_cast<size_t>(parsed.username.begin),	722 static_cast<size_t>(parsed.username.begin),

720 static_cast<size_t>(parsed.username.len + parsed.password.len + 2),	723 static_cast<size_t>(parsed.username.len + parsed.password.len + 2),

721 0));	724 0));

722 } else {	725 } else {

723 const url::Component* nonempty_component =	726 const url::Component* nonempty_component =

724 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;	727 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;

725 // The seeming off-by-one is to account for the '@' after the	728 // The seeming off-by-one is to account for the '@' after the

726 // username/password.	729 // username/password.

727 adjustments->push_back(base::OffsetAdjuster::Adjustment(	730 adjustments->push_back(base::OffsetAdjuster::Adjustment(

728 static_cast<size_t>(nonempty_component->begin),	731 static_cast<size_t>(nonempty_component->begin),

729 static_cast<size_t>(nonempty_component->len + 1),	732 static_cast<size_t>(nonempty_component->len + 1), 0));

730 0));

731 }	733 }

732 }	734 }

733 } else {	735 } else {

734 AppendFormattedComponent(spec, parsed.username,	736 AppendFormattedComponent(spec, parsed.username,

735 NonHostComponentTransform(unescape_rules),	737 NonHostComponentTransform(unescape_rules),

736 &url_string, &new_parsed->username, adjustments);	738 &url_string, &new_parsed->username, adjustments);

737 if (parsed.password.is_valid())	739 if (parsed.password.is_valid())

738 url_string.push_back(':');	740 url_string.push_back(':');

739 AppendFormattedComponent(spec, parsed.password,	741 AppendFormattedComponent(spec, parsed.password,

740 NonHostComponentTransform(unescape_rules),	742 NonHostComponentTransform(unescape_rules),

741 &url_string, &new_parsed->password, adjustments);	743 &url_string, &new_parsed->password, adjustments);

742 if (parsed.username.is_valid() \|\| parsed.password.is_valid())	744 if (parsed.username.is_valid() \|\| parsed.password.is_valid())

743 url_string.push_back('@');	745 url_string.push_back('@');

744 }	746 }

745 if (prefix_end)	747 if (prefix_end)

746 *prefix_end = static_cast<size_t>(url_string.length());	748 *prefix_end = static_cast<size_t>(url_string.length());

747	749

748 // Host.	750 // Host.

749 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(languages),	751 AppendFormattedComponent(spec, parsed.host, HostComponentTransform(languages),

750 &url_string, &new_parsed->host, adjustments);	752 &url_string, &new_parsed->host, adjustments);

751	753

752 // Port.	754 // Port.

753 if (parsed.port.is_nonempty()) {	755 if (parsed.port.is_nonempty()) {

754 url_string.push_back(':');	756 url_string.push_back(':');

755 new_parsed->port.begin = url_string.length();	757 new_parsed->port.begin = url_string.length();

756 url_string.insert(url_string.end(),	758 url_string.insert(url_string.end(), spec.begin() + parsed.port.begin,

757 spec.begin() + parsed.port.begin,

758 spec.begin() + parsed.port.end());	759 spec.begin() + parsed.port.end());

759 new_parsed->port.len = url_string.length() - new_parsed->port.begin;	760 new_parsed->port.len = url_string.length() - new_parsed->port.begin;

760 } else {	761 } else {

761 new_parsed->port.reset();	762 new_parsed->port.reset();

762 }	763 }

763	764

764 // Path & query. Both get the same general unescape & convert treatment.	765 // Path & query. Both get the same general unescape & convert treatment.

765 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) \|\|	766 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) \|\|

766 !CanStripTrailingSlash(url)) {	767 !CanStripTrailingSlash(url)) {

767 AppendFormattedComponent(spec, parsed.path,	768 AppendFormattedComponent(spec, parsed.path,

(...skipping 20 matching lines...) Expand all Loading...
788	789

789 // If we need to strip out http do it after the fact.	790 // If we need to strip out http do it after the fact.

790 if (omit_http && StartsWith(url_string, base::ASCIIToUTF16(kHTTP), true)) {	791 if (omit_http && StartsWith(url_string, base::ASCIIToUTF16(kHTTP), true)) {

791 const size_t kHTTPSize = arraysize(kHTTP) - 1;	792 const size_t kHTTPSize = arraysize(kHTTP) - 1;

792 url_string = url_string.substr(kHTTPSize);	793 url_string = url_string.substr(kHTTPSize);

793 // Because offsets in the \|adjustments\| are already calculated with respect	794 // Because offsets in the \|adjustments\| are already calculated with respect

794 // to the string with the http:// prefix in it, those offsets remain correct	795 // to the string with the http:// prefix in it, those offsets remain correct

795 // after stripping the prefix. The only thing necessary is to add an	796 // after stripping the prefix. The only thing necessary is to add an

796 // adjustment to reflect the stripped prefix.	797 // adjustment to reflect the stripped prefix.

797 adjustments->insert(adjustments->begin(),	798 adjustments->insert(adjustments->begin(),

798 base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0));	799 base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0));

799	800

800 if (prefix_end)	801 if (prefix_end)

801 *prefix_end -= kHTTPSize;	802 *prefix_end -= kHTTPSize;

802	803

803 // Adjust new_parsed.	804 // Adjust new_parsed.

804 DCHECK(new_parsed->scheme.is_valid());	805 DCHECK(new_parsed->scheme.is_valid());

805 int delta = -(new_parsed->scheme.len + 3); // +3 for ://.	806 int delta = -(new_parsed->scheme.len + 3); // +3 for ://.

806 new_parsed->scheme.reset();	807 new_parsed->scheme.reset();

807 AdjustAllComponentsButScheme(delta, new_parsed);	808 AdjustAllComponentsButScheme(delta, new_parsed);

808 }	809 }

809	810

810 return url_string;	811 return url_string;

811 }	812 }

812	813

813 base::string16 FormatUrl(const GURL& url,	814 base::string16 FormatUrl(const GURL& url,

814 const std::string& languages,	815 const std::string& languages,

815 FormatUrlTypes format_types,	816 FormatUrlTypes format_types,

816 UnescapeRule::Type unescape_rules,	817 UnescapeRule::Type unescape_rules,

817 url::Parsed* new_parsed,	818 url::Parsed* new_parsed,

818 size_t* prefix_end,	819 size_t* prefix_end,

819 size_t* offset_for_adjustment) {	820 size_t* offset_for_adjustment) {

820 Offsets offsets;	821 Offsets offsets;

821 if (offset_for_adjustment)	822 if (offset_for_adjustment)

822 offsets.push_back(*offset_for_adjustment);	823 offsets.push_back(*offset_for_adjustment);

823 base::string16 result = FormatUrlWithOffsets(url, languages, format_types,	824 base::string16 result =

824 unescape_rules, new_parsed, prefix_end, &offsets);	825 FormatUrlWithOffsets(url, languages, format_types, unescape_rules,

	826 new_parsed, prefix_end, &offsets);

825 if (offset_for_adjustment)	827 if (offset_for_adjustment)

826 *offset_for_adjustment = offsets[0];	828 *offset_for_adjustment = offsets[0];

827 return result;	829 return result;

828 }	830 }

829	831

830 } // namespace net	832 } // namespace net

OLD	NEW

« net/base/net_log_util.cc ('K') | « net/base/net_util.cc ('k') | net/base/net_util_icu_unittest.cc » ('j') | net/base/net_util_icu_unittest.cc » ('J')