src/uri.cc - Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions.

Side by Side Diff: src/uri.cc

Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Rebase Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/uri.h"	5 #include "src/uri.h"

6	6

7 #include "src/char-predicates-inl.h"	7 #include "src/char-predicates-inl.h"

8 #include "src/handles.h"	8 #include "src/handles.h"

9 #include "src/isolate-inl.h"	9 #include "src/isolate-inl.h"

10 #include "src/list.h"	10 #include "src/list.h"

11	11

12 namespace v8 {	12 namespace v8 {

13 namespace internal {	13 namespace internal {

14	14

	15 namespace { // anonymous namespace for DecodeURI helper functions

	16 bool IsReservedPredicate(uc16 c) {

	17 switch (c) {

	18 case '#':

	19 case '$':

	20 case '&':

	21 case '+':

	22 case ',':

	23 case '/':

	24 case ':':

	25 case ';':

	26 case '=':

	27 case '?':

	28 case '@':

	29 return true;

	30 default:

	31 return false;

	32 }

	33 }

	34

	35 bool IsReplacementCharacter(const uint8_t* octets, int length) {

	36 // The replacement character is at codepoint U+FFFD in the Unicode Specials

	37 // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD.

	38 if (length != 3 \|\| octets[0] != 0xef \|\| octets[1] != 0xbf \|\|

	39 octets[2] != 0xbd) {

	40 return false;

	41 }

	42 return true;

	43 }

	44

	45 bool DecodeOctets(const uint8_t* octets, int length, List<uc16>* buffer) {

	46 size_t cursor = 0;

	47 uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor);

	48 if (value == unibrow::Utf8::kBadChar &&

	49 !IsReplacementCharacter(octets, length)) {

	50 return false;

	51 }

	52

	53 if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) {

	54 buffer->Add(value);

	55 } else {

	56 buffer->Add(unibrow::Utf16::LeadSurrogate(value));

	57 buffer->Add(unibrow::Utf16::TrailSurrogate(value));

	58 }

	59 return true;

	60 }

	61

	62 bool TwoDigitHex(int index, String::FlatContent* uri_content, uc16* decoded) {

	63 char high = HexValue(uri_content->Get(index + 1));

	64 char low = HexValue(uri_content->Get(index + 2));

	65 if (high < 0 \|\| low < 0) {

	66 return false;

	67 }

	68 *decoded = (high << 4) \| low;

	69 return true;

	70 }

	71

	72 template <typename T>

	73 void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index,

	74 bool is_uri, List<T>* buffer) {

	75 if (is_uri && IsReservedPredicate(decoded)) {

	76 buffer->Add('%');

	77 uc16 first = uri_content->Get(index + 1);

	78 uc16 second = uri_content->Get(index + 2);

	79 DCHECK_GT(std::numeric_limits<T>::max(), first);

	80 DCHECK_GT(std::numeric_limits<T>::max(), second);

	81

	82 buffer->Add(first);

	83 buffer->Add(second);

	84 } else {

	85 buffer->Add(decoded);

	86 }

	87 }

	88

	89 bool IntoTwoByte(int index, bool is_uri, int uri_length,

	90 String::FlatContent* uri_content, List<uc16>* buffer) {

	91 for (int k = index; k < uri_length; k++) {

	92 uc16 code = uri_content->Get(k);

	93 if (code == '%') {

	94 uc16 decoded;

	95 if (k + 2 >= uri_length \|\| !TwoDigitHex(k, uri_content, &decoded)) {

	96 return false;

	97 }

	98 k += 2;

	99 if (decoded > unibrow::Utf8::kMaxOneByteChar) {

	100 uint8_t octets[unibrow::Utf8::kMaxEncodedSize];

	101 octets[0] = decoded;

	102

	103 int number_of_continuation_bytes = 0;

	104 while ((decoded << ++number_of_continuation_bytes) & 0x80) {

	105 if (number_of_continuation_bytes > 3 \|\| k + 3 >= uri_length) {

	106 return false;

	107 }

	108

	109 uc16 continuation_byte;

	110

	111 if (uri_content->Get(++k) != '%' \|\|

	112 !TwoDigitHex(k, uri_content, &continuation_byte)) {

	113 return false;

	114 }

	115 k += 2;

	116 octets[number_of_continuation_bytes] = continuation_byte;

	117 }

	118

	119 if (!DecodeOctets(octets, number_of_continuation_bytes, buffer)) {

	120 return false;

	121 }

	122 } else {

	123 AddToBuffer(decoded, uri_content, k - 2, is_uri, buffer);

	124 }

	125 } else {

	126 buffer->Add(code);

	127 }

	128 }

	129 return true;

	130 }

	131

	132 bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,

	133 List<uint8_t>* one_byte_buffer,

	134 List<uc16>* two_byte_buffer) {

	135 DisallowHeapAllocation no_gc;

	136 String::FlatContent uri_content = uri->GetFlatContent();

	137

	138 int uri_length = uri->length();

	139 for (int k = 0; k < uri_length; k++) {

	140 uc16 code = uri_content.Get(k);

	141 if (code == '%') {

	142 uc16 decoded;

	143 if (k + 2 >= uri_length \|\| !TwoDigitHex(k, &uri_content, &decoded)) {

	144 return false;

	145 }

	146

	147 if (decoded > unibrow::Utf8::kMaxOneByteChar) {

	148 return IntoTwoByte(k, is_uri, uri_length, &uri_content,

	149 two_byte_buffer);

	150 }

	151

	152 AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);

	153 k += 2;

	154 } else {

	155 if (code > unibrow::Utf8::kMaxOneByteChar) {

	156 return IntoTwoByte(k, is_uri, uri_length, &uri_content,

	157 two_byte_buffer);

	158 }

	159 one_byte_buffer->Add(code);

	160 }

	161 }

	162 return true;

	163 }

	164

	165 } // anonymous namespace

	166

	167 MaybeHandle<String> Uri::Decode(Isolate* isolate, Handle<String> uri,

	168 bool is_uri) {

	169 uri = String::Flatten(uri);

	170 List<uint8_t> one_byte_buffer;

	171 List<uc16> two_byte_buffer;

	172

	173 if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {

	174 THROW_NEW_ERROR(isolate, NewURIError(), String);

	175 }

	176

	177 if (two_byte_buffer.is_empty()) {

	178 return isolate->factory()->NewStringFromOneByte(

	179 one_byte_buffer.ToConstVector());

	180 }

	181

	182 Handle<SeqTwoByteString> result;

	183 ASSIGN_RETURN_ON_EXCEPTION(

	184 isolate, result, isolate->factory()->NewRawTwoByteString(

	185 one_byte_buffer.length() + two_byte_buffer.length()),

	186 String);

	187

	188 CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(),

	189 one_byte_buffer.length());

	190 CopyChars(result->GetChars() + one_byte_buffer.length(),

	191 two_byte_buffer.ToConstVector().start(), two_byte_buffer.length());

	192

	193 return result;

	194 }

	195

15 namespace { // anonymous namespace for EncodeURI helper functions	196 namespace { // anonymous namespace for EncodeURI helper functions

16 bool IsUnescapePredicateInUriComponent(uc16 c) {	197 bool IsUnescapePredicateInUriComponent(uc16 c) {

17 if (IsAlphaNumeric(c)) {	198 if (IsAlphaNumeric(c)) {

18 return true;	199 return true;

19 }	200 }

20	201

21 switch (c) {	202 switch (c) {

22 case '!':	203 case '!':

23 case '\'':	204 case '\'':

24 case '(':	205 case '(':

(...skipping 28 matching lines...) Expand all Loading...
53 }	234 }

54 }	235 }

55	236

56 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {	237 void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {

57 buffer->Add('%');	238 buffer->Add('%');

58 buffer->Add(HexCharOfValue(octet >> 4));	239 buffer->Add(HexCharOfValue(octet >> 4));

59 buffer->Add(HexCharOfValue(octet & 0x0F));	240 buffer->Add(HexCharOfValue(octet & 0x0F));

60 }	241 }

61	242

62 void EncodeSingle(uc16 c, List<uint8_t>* buffer) {	243 void EncodeSingle(uc16 c, List<uint8_t>* buffer) {

63 uint8_t x = (c >> 12) & 0xF;	244 char s[4];

64 uint8_t y = (c >> 6) & 63;	245 int number_of_bytes;

65 uint8_t z = c & 63;	246 number_of_bytes =

66 if (c <= 0x007F) {	247 unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);

67 AddHexEncodedToBuffer(c, buffer);	248 for (int k = 0; k < number_of_bytes; k++) {

68 } else if (c <= 0x07FF) {	249 AddHexEncodedToBuffer(s[k], buffer);

69 AddHexEncodedToBuffer(y + 192, buffer);

70 AddHexEncodedToBuffer(z + 128, buffer);

71 } else {

72 AddHexEncodedToBuffer(x + 224, buffer);

73 AddHexEncodedToBuffer(y + 128, buffer);

74 AddHexEncodedToBuffer(z + 128, buffer);

75 }	250 }

76 }	251 }

77	252

78 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {	253 void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {

79 uint8_t u = ((cc1 >> 6) & 0xF) + 1;	254 char s[4];

80 uint8_t w = (cc1 >> 2) & 0xF;	255 int number_of_bytes =

81 uint8_t x = cc1 & 3;	256 unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),

82 uint8_t y = (cc2 >> 6) & 0xF;	257 unibrow::Utf16::kNoPreviousCharacter, false);

83 uint8_t z = cc2 & 63;	258 for (int k = 0; k < number_of_bytes; k++) {

84 AddHexEncodedToBuffer((u >> 2) + 240, buffer);	259 AddHexEncodedToBuffer(s[k], buffer);

85 AddHexEncodedToBuffer((((u & 3) << 4) \| w) + 128, buffer);	260 }

86 AddHexEncodedToBuffer(((x << 4) \| y) + 128, buffer);

87 AddHexEncodedToBuffer(z + 128, buffer);

88 }	261 }

89	262

90 } // anonymous namespace	263 } // anonymous namespace

91	264

92 MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri,	265 MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri,

93 bool is_uri) {	266 bool is_uri) {

94 uri = String::Flatten(uri);	267 uri = String::Flatten(uri);

95 int uri_length = uri->length();	268 int uri_length = uri->length();

96 List<uint8_t> buffer(uri_length);	269 List<uint8_t> buffer(uri_length);

97	270

(...skipping 25 matching lines...) Expand all Loading...
123 AllowHeapAllocation allocate_error_and_return;	296 AllowHeapAllocation allocate_error_and_return;

124 THROW_NEW_ERROR(isolate, NewURIError(), String);	297 THROW_NEW_ERROR(isolate, NewURIError(), String);

125 }	298 }

126 }	299 }

127	300

128 return isolate->factory()->NewStringFromOneByte(buffer.ToConstVector());	301 return isolate->factory()->NewStringFromOneByte(buffer.ToConstVector());

129 }	302 }

130	303

131 } // namespace internal	304 } // namespace internal

132 } // namespace v8	305 } // namespace v8

OLD	NEW

« no previous file with comments | « src/uri.h ('k') | test/cctest/compiler/test-run-intrinsics.cc » ('j') | no next file with comments »