net/base/escape.cc - Issue 664803003: Update from chromium a8e7c94b1b79a0948d05a1fcfff53391d22ce37a

Side by Side Diff: net/base/escape.cc

Issue 664803003: Update from chromium a8e7c94b1b79a0948d05a1fcfff53391d22ce37a (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/escape.h"	5 #include "net/base/escape.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8	8

9 #include "base/logging.h"	9 #include "base/logging.h"

10 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

(...skipping 102 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
113 const typename STR::value_type least_sig_digit(	113 const typename STR::value_type least_sig_digit(

114 static_cast<typename STR::value_type>(escaped_text[index + 2]));	114 static_cast<typename STR::value_type>(escaped_text[index + 2]));

115 if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {	115 if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {

116 value = HexDigitToInt(most_sig_digit) 16 +	116 value = HexDigitToInt(most_sig_digit) 16 +

117 HexDigitToInt(least_sig_digit);	117 HexDigitToInt(least_sig_digit);

118 return true;	118 return true;

119 }	119 }

120 return false;	120 return false;

121 }	121 }

122	122

	123 // Returns true if there is an Arabic Language Mark at \|index\|. \|first_byte\|

	124 // is the byte at \|index\|.

	125 template<typename STR>

	126 bool HasArabicLanguageMarkAtIndex(const STR& escaped_text,

	127 unsigned char first_byte,

	128 size_t index) {

	129 if (first_byte != 0xD8)

	130 return false;

	131 unsigned char second_byte;

	132 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))

	133 return false;

	134 return second_byte == 0x9c;

	135 }

	136

	137 // Returns true if there is a BiDi control char at \|index\|. \|first_byte\| is the

	138 // byte at \|index\|.

	139 template<typename STR>

	140 bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text,

	141 unsigned char first_byte,

	142 size_t index) {

	143 if (first_byte != 0xE2)

	144 return false;

	145 unsigned char second_byte;

	146 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))

	147 return false;

	148 if (second_byte != 0x80 && second_byte != 0x81)

	149 return false;

	150 unsigned char third_byte;

	151 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))

	152 return false;

	153 if (second_byte == 0x80) {

	154 return third_byte == 0x8E \|\|

	155 third_byte == 0x8F \|\|

	156 (third_byte >= 0xAA && third_byte <= 0xAE);

	157 }

	158 return third_byte >= 0xA6 && third_byte <= 0xA9;

	159 }

	160

123 // Unescapes \|escaped_text\| according to \|rules\|, returning the resulting	161 // Unescapes \|escaped_text\| according to \|rules\|, returning the resulting

124 // string. Fills in an \|adjustments\| parameter, if non-NULL, so it reflects	162 // string. Fills in an \|adjustments\| parameter, if non-NULL, so it reflects

125 // the alterations done to the string that are not one-character-to-one-	163 // the alterations done to the string that are not one-character-to-one-

126 // character. The resulting \|adjustments\| will always be sorted by increasing	164 // character. The resulting \|adjustments\| will always be sorted by increasing

127 // offset.	165 // offset.

128 template<typename STR>	166 template<typename STR>

129 STR UnescapeURLWithAdjustmentsImpl(	167 STR UnescapeURLWithAdjustmentsImpl(

130 const STR& escaped_text,	168 const STR& escaped_text,

131 UnescapeRule::Type rules,	169 UnescapeRule::Type rules,

132 base::OffsetAdjuster::Adjustments* adjustments) {	170 base::OffsetAdjuster::Adjustments* adjustments) {

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
165 //	203 //

166 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC	204 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC

167 // 3987 above has since added some new BiDi control characters.	205 // 3987 above has since added some new BiDi control characters.

168 // http://www.unicode.org/reports/tr9	206 // http://www.unicode.org/reports/tr9

169 //	207 //

170 // U+061C ARABIC LETTER MARK (%D8%9C)	208 // U+061C ARABIC LETTER MARK (%D8%9C)

171 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)	209 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)

172 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)	210 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)

173 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)	211 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)

174 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)	212 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)

175	213 //

176 unsigned char second_byte;	214 // However, some schemes such as data: and file: need to parse the exact

177 // Check for ALM.	215 // binary data when loading the URL. For that reason, CONTROL_CHARS allows

178 if ((first_byte == 0xD8) &&	216 // unescaping BiDi control characters.

179 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&	217 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed

180 (second_byte == 0x9c)) {	218 // in the UI.

181 result.append(escaped_text, i, 6);	219 if (!(rules & UnescapeRule::CONTROL_CHARS)) {

182 i += 5;	220 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {

183 continue;	221 // Keep Arabic Language Mark escaped.

184 }	222 result.append(escaped_text, i, 6);

185	223 i += 5;

186 // Check for other BiDi control characters.	224 continue;

187 if ((first_byte == 0xE2) &&	225 }

188 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&	226 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {

189 ((second_byte == 0x80) \|\| (second_byte == 0x81))) {	227 // Keep BiDi control char escaped.

190 unsigned char third_byte;

191 if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) &&

192 ((second_byte == 0x80) ?

193 ((third_byte == 0x8E) \|\| (third_byte == 0x8F) \|\|

194 ((third_byte >= 0xAA) && (third_byte <= 0xAE))) :

195 ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) {

196 result.append(escaped_text, i, 9);	228 result.append(escaped_text, i, 9);

197 i += 8;	229 i += 8;

198 continue;	230 continue;

199 }	231 }

200 }	232 }

201	233

202 if (first_byte >= 0x80 \|\| // Unescape all high-bit characters.	234 if (first_byte >= 0x80 \|\| // Unescape all high-bit characters.

203 // For 7-bit characters, the lookup table tells us all valid chars.	235 // For 7-bit characters, the lookup table tells us all valid chars.

204 (kUrlUnescape[first_byte] \|\|	236 (kUrlUnescape[first_byte] \|\|

205 // ...and we allow some additional unescaping when flags are set.	237 // ...and we allow some additional unescaping when flags are set.

(...skipping 196 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
402 1, kEscapeToChars[i].replacement);	434 1, kEscapeToChars[i].replacement);

403 break;	435 break;

404 }	436 }

405 }	437 }

406 }	438 }

407 }	439 }

408 return text;	440 return text;

409 }	441 }

410	442

411 } // namespace net	443 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »