src/uri.cc - Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions.

Side by Side Diff: src/uri.cc

Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Move new test 612109.js to regress/ Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/uri.h"	5 #include "src/uri.h"

6	6

7 #include "src/char-predicates-inl.h"	7 #include "src/char-predicates-inl.h"

8 #include "src/handles.h"	8 #include "src/handles.h"

9 #include "src/isolate-inl.h"	9 #include "src/isolate-inl.h"

10 #include "src/list.h"	10 #include "src/list.h"

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
53 }	53 }

54 }	54 }

55	55

56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {	56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {

57 buffer->Add('%');	57 buffer->Add('%');

58 buffer->Add(HexCharOfValue(octet >> 4));	58 buffer->Add(HexCharOfValue(octet >> 4));

59 buffer->Add(HexCharOfValue(octet & 0x0F));	59 buffer->Add(HexCharOfValue(octet & 0x0F));

60 }	60 }

61	61

62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) {	62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) {

63 uint8_t x = (c >> 12) & 0xF;	63 char s[4];

64 uint8_t y = (c >> 6) & 63;	64 int number_of_bytes;

65 uint8_t z = c & 63;	65 number_of_bytes =

66 if (c <= 0x007F) {	66 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);

67 AddHexEncodedToBuffer(c, buffer);	67 for (int k = 0; k < number_of_bytes; k++) {

68 } else if (c <= 0x07FF) {	68 AddHexEncodedToBuffer(s[k], buffer);

69 AddHexEncodedToBuffer(y + 192, buffer);

70 AddHexEncodedToBuffer(z + 128, buffer);

71 } else {

72 AddHexEncodedToBuffer(x + 224, buffer);

73 AddHexEncodedToBuffer(y + 128, buffer);

74 AddHexEncodedToBuffer(z + 128, buffer);

75 }	69 }

76 }	70 }

77	71

78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {	72 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {

79 uint8_t u = ((cc1 >> 6) & 0xF) + 1;	73 char s[4];

80 uint8_t w = (cc1 >> 2) & 0xF;	74 int number_of_bytes =

81 uint8_t x = cc1 & 3;	75 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),

82 uint8_t y = (cc2 >> 6) & 0xF;	76 unibrow::Utf16::kNoPreviousCharacter, false);

83 uint8_t z = cc2 & 63;	77 for (int k = 0; k < number_of_bytes; k++) {

84 AddHexEncodedToBuffer((u >> 2) + 240, buffer);	78 AddHexEncodedToBuffer(s[k], buffer);

85 AddHexEncodedToBuffer((((u & 3) << 4) \| w) + 128, buffer);	79 }

86 AddHexEncodedToBuffer(((x << 4) \| y) + 128, buffer);

87 AddHexEncodedToBuffer(z + 128, buffer);

88 }	80 }

89	81

90 } // anonymous namespace	82 } // anonymous namespace

91	83

92 Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) {	84 MaybeHandle<Object> Uri::Encode(Isolate* isolate, Handle<String> uri,

	85 bool is_uri) {

93 uri = String::Flatten(uri);	86 uri = String::Flatten(uri);

94 int uri_length = uri->length();	87 int uri_length = uri->length();

95 List<uint8_t> buffer(uri_length);	88 List<uint8_t> buffer(uri_length);

96	89

97 {	90 {

98 DisallowHeapAllocation no_gc;	91 DisallowHeapAllocation no_gc;

99 String::FlatContent uri_content = uri->GetFlatContent();	92 String::FlatContent uri_content = uri->GetFlatContent();

100	93

101 for (int k = 0; k < uri_length; k++) {	94 for (int k = 0; k < uri_length; k++) {

102 uc16 cc1 = uri_content.Get(k);	95 uc16 cc1 = uri_content.Get(k);

(...skipping 10 matching lines...) Expand all Loading...
113 if (IsUnescapePredicateInUriComponent(cc1) \|\|	106 if (IsUnescapePredicateInUriComponent(cc1) \|\|

114 (is_uri && IsUriSeparator(cc1))) {	107 (is_uri && IsUriSeparator(cc1))) {

115 buffer.Add(cc1);	108 buffer.Add(cc1);

116 } else {	109 } else {

117 EncodeSingle(cc1, &buffer);	110 EncodeSingle(cc1, &buffer);

118 }	111 }

119 continue;	112 continue;

120 }	113 }

121	114

122 AllowHeapAllocation allocate_error_and_return;	115 AllowHeapAllocation allocate_error_and_return;

123 THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError());	116 THROW_NEW_ERROR(isolate, NewURIError(), Object);

124 }	117 }

125 }	118 }

126	119

127 Handle<String> result;	120 Handle<String> result;

128 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(	121 ASSIGN_RETURN_ON_EXCEPTION(

129 isolate, result,	122 isolate, result,

130 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()));	123 isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()), Object);

131 return *result;	124 return result;

	125 }

	126

	127 namespace { // anonymous namespace for DecodeURI helper functions

	128

	129 bool IsReservedPredicate(uc16 c) {

	130 switch (c) {

	131 case '#':

	132 case '$':

	133 case '&':

	134 case '+':

	135 case ',':

	136 case '/':

	137 case ':':

	138 case ';':

	139 case '=':

	140 case '?':

	141 case '@':

	142 return true;

	143 default:

	144 return false;

	145 }

	146 }

	147

	148 bool IsReplacementCharacter(const uint8_t* octets, int length) {

	149 // The replacement character is at codepoint U+FFFD in the Unicode Specials

	150 // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD.

	151 if (length != 3 \|\| octets[0] != 0xef \|\| octets[1] != 0xbf \|\|

	152 octets[2] != 0xbd) {

	153 return false;

	154 }

	155 return true;

	156 }

	157

	158 bool DecodeOctets(const uint8_t* octets, int length,

	159 List<uc16>* two_byte_buffer) {

	160 size_t cursor = 0;

	161 uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor);

	162 if (value == unibrow::Utf8::kBadChar &&

	163 !IsReplacementCharacter(octets, length)) {

	164 return false;

	165 }

	166

	167 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) {

	168 two_byte_buffer->Add(value);

	169 } else {

	170 two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value));

	171 two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value));

	172 }

	173 return true;

	174 }

	175

	176 bool TwoDigitHex(uc16* decoded, int index, String::FlatContent* uri_content) {

	177 char high = HexValue(uri_content->Get(index + 1));

	178 char low = HexValue(uri_content->Get(index + 2));

	179 if (high < 0 \|\| low < 0) {

	180 return false;

	181 }

	182 *decoded = (high << 4) \| low;

	183 return true;

	184 }

	185

	186 template <typename T>

	187 bool AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index,

	188 bool is_uri, List<T>* buffer) {

	189 if (is_uri && IsReservedPredicate(decoded)) {

	190 buffer->Add('%');

	191 uc16 first = uri_content->Get(index + 1);

	192 uc16 second = uri_content->Get(index + 2);

	193 if (first > std::numeric_limits<T>::max() \|\|
	Yang 2016/05/23 11:24:59 Do you expect this to happen in some cases? Imo th Do you expect this to happen in some cases? Imo this must not happen at all, since you should have figured out the correct encoding in IntoOneAndTwoByte. Please make this a DCHECK. Franzi 2016/05/24 15:07:38 You're right. Can never happen. Done. Show quoted text On 2016/05/23 11:24:59, Yang wrote: > Do you expect this to happen in some cases? Imo this must not happen at all, > since you should have figured out the correct encoding in IntoOneAndTwoByte. > Please make this a DCHECK. You're right. Can never happen. Done.
	194 second > std::numeric_limits<T>::max()) {

	195 return false;

	196 }

	197 buffer->Add(first);

	198 buffer->Add(second);

	199 } else {

	200 buffer->Add(decoded);

	201 }

	202 return true;

	203 }

	204

	205 bool IntoTwoByte(int index, bool is_uri, int uri_length,

	206 String::FlatContent* uri_content,

	207 List<uc16>* two_byte_buffer) {

	208 for (int k = index; k < uri_length; k++) {

	209 uc16 code = uri_content->Get(k);

	210 if (code == '%') {

	211 uc16 decoded;

	212 if (k + 2 >= uri_length \|\| !TwoDigitHex(&decoded, k, uri_content)) {

	213 return false;

	214 }

	215 k += 2;

	216 if (decoded > unibrow::Utf8::kMaxOneByteChar) {

	217 int n = 1;

	218 while ((decoded << n) & 0x80) {
	Yang 2016/05/23 11:24:59 Actually, you could merge this while loop with the Actually, you could merge this while loop with the for loop below, right? I wonder whether there is performance impact through this. Franzi 2016/05/24 15:07:38 Merged. Also renamed n to number_of_continuation_b Show quoted text On 2016/05/23 11:24:59, Yang wrote: > Actually, you could merge this while loop with the for loop below, right? I > wonder whether there is performance impact through this. Merged. Also renamed n to number_of_continuation_bytes and removed double checks (n==1 and k+2>length)
	219 n++;

	220 }

	221 if (n == 1 \|\| n > 4 \|\| k + 3 * (n - 1) >= uri_length) {

	222 return false;

	223 }

	224 uint8_t octets[4];

	225 int octet_length = 0;

	226 octets[octet_length++] = decoded;

	227

	228 for (int i = 1; i < n; i++) {

	229 uc16 decodedTrail;

	230

	231 if (uri_content->Get(++k) != '%' \|\| k + 2 >= uri_length \|\|

	232 !TwoDigitHex(&decodedTrail, k, uri_content)) {

	233 return false;

	234 }

	235 k += 2;

	236 octets[octet_length++] = decodedTrail;

	237 }

	238

	239 if (!DecodeOctets(octets, octet_length, two_byte_buffer)) {

	240 return false;

	241 }

	242 } else {

	243 if (!AddToBuffer(decoded, uri_content, k - 2, is_uri,

	244 two_byte_buffer)) {

	245 return false;

	246 }

	247 }

	248 } else {

	249 two_byte_buffer->Add(code);

	250 }

	251 }

	252 return true;

	253 }

	254

	255 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,

	256 List<uint8_t>* one_byte_buffer,

	257 List<uc16>* two_byte_buffer) {

	258 DisallowHeapAllocation no_gc;

	259 String::FlatContent uri_content = uri->GetFlatContent();

	260

	261 int uri_length = uri->length();

	262 for (int k = 0; k < uri_length; k++) {

	263 uc16 code = uri_content.Get(k);

	264 if (code == '%') {

	265 uc16 decoded;

	266 if (k + 2 >= uri_length \|\| !TwoDigitHex(&decoded, k, &uri_content)) {

	267 return false;

	268 }

	269

	270 if (decoded > unibrow::Utf8::kMaxOneByteChar) {

	271 return IntoTwoByte(k, is_uri, uri_length, &uri_content,

	272 two_byte_buffer);

	273 }

	274

	275 if (!AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer)) {

	276 return false;

	277 }

	278 k += 2;

	279 } else {

	280 if (code > unibrow::Utf8::kMaxOneByteChar) {

	281 return IntoTwoByte(k, is_uri, uri_length, &uri_content,

	282 two_byte_buffer);

	283 }

	284 one_byte_buffer->Add(code);

	285 }

	286 }

	287 return true;

	288 }

	289

	290 } // anonymous namespace

	291

	292 MaybeHandle<Object> Uri::Decode(Isolate* isolate, Handle<String> uri,

	293 bool is_uri) {

	294 uri = String::Flatten(uri);

	295 List<uint8_t> one_byte_buffer;

	296 List<uc16> two_byte_buffer;

	297

	298 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {

	299 THROW_NEW_ERROR(isolate, NewURIError(), Object);

	300 }

	301

	302 if (two_byte_buffer.is_empty()) {

	303 Handle<SeqOneByteString> result;

	304

	305 ASSIGN_RETURN_ON_EXCEPTION(

	306 isolate, result,

	307 isolate->factory()->NewRawOneByteString(one_byte_buffer.length()),
	Yang 2016/05/23 11:24:59 You can use NewStringFromOneByte here. You can use NewStringFromOneByte here. Franzi 2016/05/24 15:07:38 Done. Show quoted text On 2016/05/23 11:24:59, Yang wrote: > You can use NewStringFromOneByte here. Done.
	308 Object);

	309 CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(),

	310 one_byte_buffer.length());

	311 return result;

	312 }

	313

	314 Handle<SeqTwoByteString> result;

	315 ASSIGN_RETURN_ON_EXCEPTION(

	316 isolate, result, isolate->factory()->NewRawTwoByteString(

	317 one_byte_buffer.length() + two_byte_buffer.length()),

	318 Object);

	319

	320 CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(),

	321 one_byte_buffer.length());

	322 CopyChars(result->GetChars() + one_byte_buffer.length(),

	323 two_byte_buffer.ToConstVector().start(), two_byte_buffer.length());

	324

	325 return result;

132 }	326 }

133	327

134 } // namespace internal	328 } // namespace internal

135 } // namespace v8	329 } // namespace v8

OLD	NEW

« no previous file with comments | « src/uri.h ('k') | test/cctest/compiler/test-run-intrinsics.cc » ('j') | no next file with comments »