Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(334)

Side by Side Diff: net/base/escape.cc

Issue 664803003: Update from chromium a8e7c94b1b79a0948d05a1fcfff53391d22ce37a (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/escape.h" 5 #include "net/base/escape.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
113 const typename STR::value_type least_sig_digit( 113 const typename STR::value_type least_sig_digit(
114 static_cast<typename STR::value_type>(escaped_text[index + 2])); 114 static_cast<typename STR::value_type>(escaped_text[index + 2]));
115 if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) { 115 if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {
116 *value = HexDigitToInt(most_sig_digit) * 16 + 116 *value = HexDigitToInt(most_sig_digit) * 16 +
117 HexDigitToInt(least_sig_digit); 117 HexDigitToInt(least_sig_digit);
118 return true; 118 return true;
119 } 119 }
120 return false; 120 return false;
121 } 121 }
122 122
123 // Returns true if there is an Arabic Language Mark at |index|. |first_byte|
124 // is the byte at |index|.
125 template<typename STR>
126 bool HasArabicLanguageMarkAtIndex(const STR& escaped_text,
127 unsigned char first_byte,
128 size_t index) {
129 if (first_byte != 0xD8)
130 return false;
131 unsigned char second_byte;
132 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
133 return false;
134 return second_byte == 0x9c;
135 }
136
137 // Returns true if there is a BiDi control char at |index|. |first_byte| is the
138 // byte at |index|.
139 template<typename STR>
140 bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text,
141 unsigned char first_byte,
142 size_t index) {
143 if (first_byte != 0xE2)
144 return false;
145 unsigned char second_byte;
146 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
147 return false;
148 if (second_byte != 0x80 && second_byte != 0x81)
149 return false;
150 unsigned char third_byte;
151 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))
152 return false;
153 if (second_byte == 0x80) {
154 return third_byte == 0x8E ||
155 third_byte == 0x8F ||
156 (third_byte >= 0xAA && third_byte <= 0xAE);
157 }
158 return third_byte >= 0xA6 && third_byte <= 0xA9;
159 }
160
123 // Unescapes |escaped_text| according to |rules|, returning the resulting 161 // Unescapes |escaped_text| according to |rules|, returning the resulting
124 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects 162 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects
125 // the alterations done to the string that are not one-character-to-one- 163 // the alterations done to the string that are not one-character-to-one-
126 // character. The resulting |adjustments| will always be sorted by increasing 164 // character. The resulting |adjustments| will always be sorted by increasing
127 // offset. 165 // offset.
128 template<typename STR> 166 template<typename STR>
129 STR UnescapeURLWithAdjustmentsImpl( 167 STR UnescapeURLWithAdjustmentsImpl(
130 const STR& escaped_text, 168 const STR& escaped_text,
131 UnescapeRule::Type rules, 169 UnescapeRule::Type rules,
132 base::OffsetAdjuster::Adjustments* adjustments) { 170 base::OffsetAdjuster::Adjustments* adjustments) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 // 203 //
166 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC 204 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC
167 // 3987 above has since added some new BiDi control characters. 205 // 3987 above has since added some new BiDi control characters.
168 // http://www.unicode.org/reports/tr9 206 // http://www.unicode.org/reports/tr9
169 // 207 //
170 // U+061C ARABIC LETTER MARK (%D8%9C) 208 // U+061C ARABIC LETTER MARK (%D8%9C)
171 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) 209 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)
172 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) 210 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)
173 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) 211 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)
174 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) 212 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)
175 213 //
176 unsigned char second_byte; 214 // However, some schemes such as data: and file: need to parse the exact
177 // Check for ALM. 215 // binary data when loading the URL. For that reason, CONTROL_CHARS allows
178 if ((first_byte == 0xD8) && 216 // unescaping BiDi control characters.
179 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && 217 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed
180 (second_byte == 0x9c)) { 218 // in the UI.
181 result.append(escaped_text, i, 6); 219 if (!(rules & UnescapeRule::CONTROL_CHARS)) {
182 i += 5; 220 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {
183 continue; 221 // Keep Arabic Language Mark escaped.
184 } 222 result.append(escaped_text, i, 6);
185 223 i += 5;
186 // Check for other BiDi control characters. 224 continue;
187 if ((first_byte == 0xE2) && 225 }
188 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && 226 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {
189 ((second_byte == 0x80) || (second_byte == 0x81))) { 227 // Keep BiDi control char escaped.
190 unsigned char third_byte;
191 if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) &&
192 ((second_byte == 0x80) ?
193 ((third_byte == 0x8E) || (third_byte == 0x8F) ||
194 ((third_byte >= 0xAA) && (third_byte <= 0xAE))) :
195 ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) {
196 result.append(escaped_text, i, 9); 228 result.append(escaped_text, i, 9);
197 i += 8; 229 i += 8;
198 continue; 230 continue;
199 } 231 }
200 } 232 }
201 233
202 if (first_byte >= 0x80 || // Unescape all high-bit characters. 234 if (first_byte >= 0x80 || // Unescape all high-bit characters.
203 // For 7-bit characters, the lookup table tells us all valid chars. 235 // For 7-bit characters, the lookup table tells us all valid chars.
204 (kUrlUnescape[first_byte] || 236 (kUrlUnescape[first_byte] ||
205 // ...and we allow some additional unescaping when flags are set. 237 // ...and we allow some additional unescaping when flags are set.
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after
402 1, kEscapeToChars[i].replacement); 434 1, kEscapeToChars[i].replacement);
403 break; 435 break;
404 } 436 }
405 } 437 }
406 } 438 }
407 } 439 }
408 return text; 440 return text;
409 } 441 }
410 442
411 } // namespace net 443 } // namespace net
OLDNEW
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698