src/runtime/runtime-strings.cc - Issue 1968953002: [runtime] Implement encodeURI as single runtime function.

Unified Diff: src/runtime/runtime-strings.cc

Issue 1968953002: [runtime] Implement encodeURI as single runtime function. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Fix isalnum() bug Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/runtime/runtime-strings.cc

diff --git a/src/runtime/runtime-strings.cc b/src/runtime/runtime-strings.cc

index 71f27a03d6edaf487efc206f11a8fca9eb76959a..c35be0115e9c8827df4a4aa6dbc21950b57927ee 100644

--- a/src/runtime/runtime-strings.cc

+++ b/src/runtime/runtime-strings.cc

@@ -2,11 +2,11 @@

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.

-#include "src/runtime/runtime-utils.h"

#include "src/arguments.h"

#include "src/regexp/jsregexp-inl.h"

+#include "src/runtime/runtime-utils.h"

#include "src/string-builder.h"

#include "src/string-search.h"

namespace v8 {

@@ -16,7 +16,7 @@ namespace internal {

// Perform string match of pattern on subject, starting at start index.

// Caller must ensure that 0 <= start_index <= sub->length(),

// and should check that pat->length() + start_index <= sub->length().

-int StringMatch(Isolate* isolate, Handle<String> sub, Handle<String> pat,

+int StringMatch(Isolate *isolate, Handle<String> sub, Handle<String> pat,

int start_index) {

DCHECK(0 <= start_index);

DCHECK(start_index <= sub->length());

@@ -57,15 +57,15 @@ int StringMatch(Isolate* isolate, Handle<String> sub, Handle<String> pat,

// This may return an empty MaybeHandle if an exception is thrown or

// we abort due to reaching the recursion limit.

MaybeHandle<String> StringReplaceOneCharWithString(

- Isolate* isolate, Handle<String> subject, Handle<String> search,

- Handle<String> replace, bool* found, int recursion_limit) {

+ Isolate *isolate, Handle<String> subject, Handle<String> search,

Yang 2016/05/12 07:39:21 V8's convention is to have the asterisk at the typ

Franzi 2016/05/13 09:42:02 Done.

+ Handle<String> replace, bool *found, int recursion_limit) {

StackLimitCheck stackLimitCheck(isolate);

if (stackLimitCheck.HasOverflowed() || (recursion_limit == 0)) {

return MaybeHandle<String>();

}

recursion_limit--;

if (subject->IsConsString()) {

- ConsString* cons = ConsString::cast(*subject);

+ ConsString *cons = ConsString::cast(*subject);

Handle<String> first = Handle<String>(cons->first());

Handle<String> second = Handle<String>(cons->second());

Handle<String> new_first;

@@ -145,7 +145,6 @@ RUNTIME_FUNCTION(Runtime_StringIndexOf) {

return Smi::FromInt(position);

}

template <typename schar, typename pchar>

static int StringMatchBackwards(Vector<const schar> subject,

Vector<const pchar> pattern, int idx) {

@@ -347,7 +346,7 @@ RUNTIME_FUNCTION(Runtime_StringMatch) {

ZoneList<int> offsets(8, zone_scope.zone());

while (true) {

- int32_t* match = global_cache.FetchNext();

+ int32_t *match = global_cache.FetchNext();

if (match == NULL) break;

offsets.Add(match[0], zone_scope.zone()); // start

offsets.Add(match[1], zone_scope.zone()); // end

@@ -454,7 +453,7 @@ RUNTIME_FUNCTION(Runtime_StringBuilderConcat) {

{

DisallowHeapAllocation no_gc;

- FixedArray* fixed_array = FixedArray::cast(array->elements());

+ FixedArray *fixed_array = FixedArray::cast(array->elements());

if (fixed_array->length() < array_length) {

array_length = fixed_array->length();

}

@@ -462,7 +461,7 @@ RUNTIME_FUNCTION(Runtime_StringBuilderConcat) {

if (array_length == 0) {

return isolate->heap()->empty_string();

} else if (array_length == 1) {

- Object* first = fixed_array->get(0);

+ Object *first = fixed_array->get(0);

if (first->IsString()) return first;

}

length = StringBuilderConcatLength(special_length, fixed_array,

@@ -513,7 +512,7 @@ RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {

if (array_length == 0) {

return isolate->heap()->empty_string();

} else if (array_length == 1) {

- Object* first = fixed_array->get(0);

+ Object *first = fixed_array->get(0);

RUNTIME_ASSERT(first->IsString());

return first;

}

@@ -527,9 +526,9 @@ RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {

}

int length = (array_length - 1) * separator_length;

for (int i = 0; i < array_length; i++) {

- Object* element_obj = fixed_array->get(i);

+ Object *element_obj = fixed_array->get(i);

RUNTIME_ASSERT(element_obj->IsString());

- String* element = String::cast(element_obj);

+ String *element = String::cast(element_obj);

int increment = element->length();

if (increment > String::kMaxLength - length) {

STATIC_ASSERT(String::kMaxLength < kMaxInt);

@@ -545,14 +544,14 @@ RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {

DisallowHeapAllocation no_gc;

- uc16* sink = answer->GetChars();

+ uc16 *sink = answer->GetChars();

#ifdef DEBUG

uc16* end = sink + length;

#endif

RUNTIME_ASSERT(fixed_array->get(0)->IsString());

- String* first = String::cast(fixed_array->get(0));

- String* separator_raw = *separator;

+ String *first = String::cast(fixed_array->get(0));

+ String *separator_raw = *separator;

int first_length = first->length();

String::WriteToFlat(first, sink, 0, first_length);

@@ -564,7 +563,7 @@ RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {

sink += separator_length;

RUNTIME_ASSERT(fixed_array->get(i)->IsString());

- String* element = String::cast(fixed_array->get(i));

+ String *element = String::cast(fixed_array->get(i));

int element_length = element->length();

DCHECK(sink + element_length <= end);

String::WriteToFlat(element, sink, 0, element_length);

@@ -578,15 +577,15 @@ RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {

}

template <typename sinkchar>

-static void WriteRepeatToFlat(String* src, Vector<sinkchar> buffer, int cursor,

+static void WriteRepeatToFlat(String *src, Vector<sinkchar> buffer, int cursor,

int repeat, int length) {

if (repeat == 0) return;

- sinkchar* start = &buffer[cursor];

+ sinkchar *start = &buffer[cursor];

String::WriteToFlat<sinkchar>(src, start, 0, length);

int done = 1;

- sinkchar* next = start + length;

+ sinkchar *next = start + length;

while (done < repeat) {

int block = Min(done, repeat - done);

@@ -598,10 +597,10 @@ static void WriteRepeatToFlat(String* src, Vector<sinkchar> buffer, int cursor,

}

template <typename Char>

-static void JoinSparseArrayWithSeparator(FixedArray* elements,

+static void JoinSparseArrayWithSeparator(FixedArray *elements,

int elements_length,

uint32_t array_length,

- String* separator,

+ String *separator,

Vector<Char> buffer) {

DisallowHeapAllocation no_gc;

int previous_separator_position = 0;

@@ -610,7 +609,7 @@ static void JoinSparseArrayWithSeparator(FixedArray* elements,

int cursor = 0;

for (int i = 0; i < elements_length; i += 2) {

int position = NumberToInt32(elements->get(i));

- String* string = String::cast(elements->get(i + 1));

+ String *string = String::cast(elements->get(i + 1));

int string_length = string->length();

if (string->length() > 0) {

int repeat = position - previous_separator_position;

@@ -654,11 +653,11 @@ RUNTIME_FUNCTION(Runtime_SparseJoinWithSeparator) {

CONVERT_NUMBER_CHECKED(int, elements_length, Int32, elements_array->length());

RUNTIME_ASSERT(elements_length <= elements_array->elements()->length());

RUNTIME_ASSERT((elements_length & 1) == 0); // Even length.

- FixedArray* elements = FixedArray::cast(elements_array->elements());

+ FixedArray *elements = FixedArray::cast(elements_array->elements());

{

DisallowHeapAllocation no_gc;

for (int i = 0; i < elements_length; i += 2) {

- String* string = String::cast(elements->get(i + 1));

+ String *string = String::cast(elements->get(i + 1));

int length = string->length();

if (is_one_byte && !string->IsOneByteRepresentation()) {

is_one_byte = false;

@@ -723,15 +722,15 @@ RUNTIME_FUNCTION(Runtime_SparseJoinWithSeparator) {

// one-char strings in the cache. Gives up on the first char that is

// not in the cache and fills the remainder with smi zeros. Returns

// the length of the successfully copied prefix.

-static int CopyCachedOneByteCharsToArray(Heap* heap, const uint8_t* chars,

- FixedArray* elements, int length) {

+static int CopyCachedOneByteCharsToArray(Heap *heap, const uint8_t *chars,

+ FixedArray *elements, int length) {

DisallowHeapAllocation no_gc;

- FixedArray* one_byte_cache = heap->single_character_string_cache();

- Object* undefined = heap->undefined_value();

+ FixedArray *one_byte_cache = heap->single_character_string_cache();

+ Object *undefined = heap->undefined_value();

int i;

WriteBarrierMode mode = elements->GetWriteBarrierMode(no_gc);

for (i = 0; i < length; ++i) {

- Object* value = one_byte_cache->get(chars[i]);

+ Object *value = one_byte_cache->get(chars[i]);

if (value == undefined) break;

elements->set(i, value, mode);

}

@@ -806,11 +805,10 @@ static inline bool ToUpperOverflows(uc32 character) {

return (character == yuml_code || character == micro_code);

}

template <class Converter>

-MUST_USE_RESULT static Object* ConvertCaseHelper(

- Isolate* isolate, String* string, SeqString* result, int result_length,

- unibrow::Mapping<Converter, 128>* mapping) {

+MUST_USE_RESULT static Object *ConvertCaseHelper(

+ Isolate *isolate, String *string, SeqString *result, int result_length,

+ unibrow::Mapping<Converter, 128> *mapping) {

DisallowHeapAllocation no_gc;

// We try this twice, once with the assumption that the result is no longer

// than the input and, if that assumption breaks, again with the exact

@@ -946,10 +944,9 @@ static bool CheckFastAsciiConvert(char* dst, const char* src, int length,

}

#endif

template <class Converter>

-static bool FastAsciiConvert(char* dst, const char* src, int length,

- bool* changed_out) {

+static bool FastAsciiConvert(char *dst, const char *src, int length,

+ bool *changed_out) {

#ifdef DEBUG

char* saved_dst = dst;

const char* saved_src = src;

@@ -963,7 +960,7 @@ static bool FastAsciiConvert(char* dst, const char* src, int length,

static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1;

bool changed = false;

uintptr_t or_acc = 0;

- const char* const limit = src + length;

+ const char *const limit = src + length;

// dst is newly allocated and always aligned.

DCHECK(IsAligned(reinterpret_cast<intptr_t>(dst), sizeof(uintptr_t)));

@@ -972,26 +969,26 @@ static bool FastAsciiConvert(char* dst, const char* src, int length,

// Process the prefix of the input that requires no conversion one aligned

// (machine) word at a time.

while (src <= limit - sizeof(uintptr_t)) {

- const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);

+ const uintptr_t w = *reinterpret_cast<const uintptr_t *>(src);

or_acc |= w;

if (AsciiRangeMask(w, lo, hi) != 0) {

changed = true;

break;

}

- *reinterpret_cast<uintptr_t*>(dst) = w;

+ *reinterpret_cast<uintptr_t *>(dst) = w;

src += sizeof(uintptr_t);

dst += sizeof(uintptr_t);

}

// Process the remainder of the input performing conversion when

// required one word at a time.

while (src <= limit - sizeof(uintptr_t)) {

- const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);

+ const uintptr_t w = *reinterpret_cast<const uintptr_t *>(src);

or_acc |= w;

uintptr_t m = AsciiRangeMask(w, lo, hi);

// The mask has high (7th) bit set in every byte that needs

// conversion and we know that the distance between cases is

// 1 << 5.

- *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);

+ *reinterpret_cast<uintptr_t *>(dst) = w ^ (m >> 2);

src += sizeof(uintptr_t);

dst += sizeof(uintptr_t);

}

@@ -1019,11 +1016,10 @@ static bool FastAsciiConvert(char* dst, const char* src, int length,

return true;

}

template <class Converter>

-MUST_USE_RESULT static Object* ConvertCase(

- Handle<String> s, Isolate* isolate,

- unibrow::Mapping<Converter, 128>* mapping) {

+MUST_USE_RESULT static Object *ConvertCase(

Yang 2016/05/12 07:39:21 You probably did some auto format on your IDE. Ple

Franzi 2016/05/13 09:42:03 Acknowledged.

+ Handle<String> s, Isolate *isolate,

+ unibrow::Mapping<Converter, 128> *mapping) {

s = String::Flatten(s);

int length = s->length();

// Assume that the string is not empty; we need this assumption later

@@ -1044,8 +1040,8 @@ MUST_USE_RESULT static Object* ConvertCase(

DCHECK(flat_content.IsFlat());

bool has_changed_character = false;

bool is_ascii = FastAsciiConvert<Converter>(

- reinterpret_cast<char*>(result->GetChars()),

- reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),

+ reinterpret_cast<char *>(result->GetChars()),

+ reinterpret_cast<const char *>(flat_content.ToOneByteVector().start()),

length, &has_changed_character);

// If not ASCII, we discard the result and take the 2 byte path.

if (is_ascii) return has_changed_character ? *result : *s;

@@ -1058,7 +1054,7 @@ MUST_USE_RESULT static Object* ConvertCase(

result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();

}

- Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);

+ Object *answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);

if (answer->IsException() || answer->IsString()) return answer;

DCHECK(answer->IsSmi());

@@ -1103,7 +1099,7 @@ RUNTIME_FUNCTION(Runtime_StringTrim) {

int length = string->length();

int left = 0;

- UnicodeCache* unicode_cache = isolate->unicode_cache();

+ UnicodeCache *unicode_cache = isolate->unicode_cache();

if (trimLeft) {

while (left < length &&

unicode_cache->IsWhiteSpaceOrLineTerminator(string->Get(left))) {

@@ -1151,6 +1147,140 @@ RUNTIME_FUNCTION(Runtime_NewString) {

return *result;

}

+bool unescapePredicateInComponent(uint16_t c) {

Yang 2016/05/12 07:39:21 CamelCase with capital first letter please. Also

Franzi 2016/05/13 09:42:03 Done.

+ // do not escape alphanumeric or !'()*-._~

+ if (c < 256 && isalnum(c)) {

Yang 2016/05/12 07:39:21 Let's use IsAlphaNumeric from char-predicates.h. T

Franzi 2016/05/13 09:42:03 Done.

+ return true;

+ }

+ if (c == '!' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '-' ||

+ c == '.' || c == '_' || c == '~') {

Yang 2016/05/12 07:39:21 A switch would be a lot nicer to read.

Franzi 2016/05/13 09:42:02 Done.

+ return true;

+ }

+ return false;

+bool uriSeparator(uint16_t c) {

+ // separators are #:;/?$&+,@=

+ if (c == '#' || c == ':' || c == ';' || c == '/' || c == '?' || c == '$' ||

Yang 2016/05/12 07:39:21 Same here, a switch would be nice.

Franzi 2016/05/13 09:42:02 Done.

+ c == '&' || c == '+' || c == ',' || c == '@' || c == '=') {

+ return true;

+ }

+ return false;

+bool unescape(uint16_t c, bool is_url) {

Yang 2016/05/12 07:39:21 This is only used once. Let's inline it at the cal

Franzi 2016/05/13 09:42:03 Done.

+ if (unescapePredicateInComponent(c)) {

+ return true;

+ }

+ return is_url && uriSeparator(c);

+void uriAddEncodedOctetToBuffer(uint16_t octet, List<uint16_t> *result) {

+ // Do I need lazy initialization?

Yang 2016/05/12 07:39:21 No. Just declare them as static const. And using a

Franzi 2016/05/13 09:42:02 Done.

+ int hexCharCodeArray[16] = {48, 49, 50, 51, 52, 53, 54, 55, 56, 57, // 0..9

Yang 2016/05/12 07:39:21 We have a HexCharOfValue in bignum.cc. You could m

+ 65, 66, 67, 68, 69, 70}; // A..F

+ result->Add(37); // Char code of '%'.

Yang 2016/05/12 07:39:21 How about result->Add('%')

Franzi 2016/05/13 09:42:02 Done.

+ uint16_t firstHex = octet >> 4;

+ uint16_t secondHex = octet & 0x0F;

+ DCHECK(firstHex < 16);

+ DCHECK(secondHex < 16);

+ result->Add(hexCharCodeArray[firstHex]);

+ result->Add(hexCharCodeArray[secondHex]);

+void uriEncodeOctets(uint16_t *octets, List<uint16_t> *result) {

+ uriAddEncodedOctetToBuffer(octets[0], result);

+ if (octets[1]) uriAddEncodedOctetToBuffer(octets[1], result);

+ if (octets[2]) uriAddEncodedOctetToBuffer(octets[2], result);

+ if (octets[3]) uriAddEncodedOctetToBuffer(octets[3], result);

+void uriEncodeSingle(uint16_t cc, List<uint16_t> *result) {

+ // 16 bits total, cut it in 4,6, and 6 bits

Yang 2016/05/12 07:39:21 whitespace after comma.

Franzi 2016/05/13 09:42:02 Done.

+ uint16_t x = (cc >> 12) & 0xF;

Yang 2016/05/12 07:39:21 uint8_t should suffice.

Franzi 2016/05/13 09:42:02 Done.

+ uint16_t y = (cc >> 6) & 63;

+ uint16_t z = cc & 63; // get last 6 bits

+ uint16_t octets[4] = {0, 0, 0, 0}; // TODO(franzih) 3 is enough here

+ if (cc <= 0x007F) { // smaller than 8 bits, i.e., 128

+ octets[0] = cc; // ascii same as UTF-8

Yang 2016/05/12 07:39:21 Instead of collecting octets, let's just call uriA

Franzi 2016/05/13 09:42:03 Done.

+ } else if (cc <= 0x07FF) {

+ octets[0] = y + 192; // Leading byte: 110xxxxx

+ octets[1] = z + 128; // Continuation byte: 10xxxxxx

+ } else {

+ octets[0] = x + 224; // Leading byte: 1110xxxx

+ octets[1] = y + 128; // Continuation byte: 10xxxxxx

+ octets[2] = z + 128; // Continuation byte: 10xxxxxx

+ }

+ uriEncodeOctets(octets, result);

+void uriEncodePair(uint16_t cc1, uint16_t cc2, List<uint16_t> *result) {

+ uint16_t u = ((cc1 >> 6) & 0xF) + 1;

+ uint16_t w = (cc1 >> 2) & 0xF;

+ uint16_t x = cc1 & 3;

+ uint16_t y = (cc2 >> 6) & 0xF;

+ uint16_t z = cc2 & 63;

+ uint16_t octets[4] = {0, 0, 0, 0};

+ octets[0] = (u >> 2) + 240;

+ octets[1] = (((u & 3) << 4) | w) + 128;

+ octets[2] = ((x << 4) | y) + 128;

+ octets[3] = z + 128;

+ uriEncodeOctets(octets, result);

+RUNTIME_FUNCTION(Runtime_Encode) {

+ HandleScope scope(isolate);

+ DCHECK(args.length() == 2);

+ CONVERT_ARG_HANDLE_CHECKED(String, uri, 0);

+ CONVERT_BOOLEAN_ARG_CHECKED(is_uri, 1);

+ size_t uriLength = uri->length();

Yang 2016/05/12 07:39:20 Let's use int here.

Franzi 2016/05/13 09:42:03 Done.

+ List<uint16_t> buffer;

Yang 2016/05/12 07:39:21 The result can only be ASCII characters, so please

Franzi 2016/05/13 09:42:03 Done.

+ for (size_t k = 0; k < uriLength; k++) {

Yang 2016/05/12 07:39:21 And int here.

Franzi 2016/05/13 09:42:02 Done.

+ uint16_t cc1 = uri->Get(static_cast<int>(k));

Yang 2016/05/12 07:39:21 Calling Get is fairly inefficient. We should flat

Franzi 2016/05/13 09:42:02 Done. Using Get() on FlatContent.

+ if (unescape(cc1, is_uri)) {

+ DCHECK(cc1 <= String::kMaxOneByteCharCode);

+ buffer.Add(cc1);

+ } else {

+ if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) {

Yang 2016/05/12 07:39:21 please do not use magic numbers here. Use unibrow:

Franzi 2016/05/13 09:42:02 Done.

+ THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewMakeURIError());

+ }

+ if (cc1 < 0xD800 || cc1 > 0xDBFF) {

Yang 2016/05/12 07:39:21 !unibrow::Utf16::IsLeadSurrogate

Franzi 2016/05/13 09:42:02 Done.

+ uriEncodeSingle(cc1, &buffer);

+ } else {

+ k++;

+ if (k == uriLength) {

+ THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewMakeURIError());

+ }

+ uint16_t cc2 = uri->Get(static_cast<int>(k));

+ if (cc2 < 0xDC00 || cc2 > 0xDFFF) {

Yang 2016/05/12 07:39:21 Here as well.

Franzi 2016/05/13 09:42:03 Done.

+ THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewMakeURIError());

+ }

+ uriEncodePair(cc1, cc2, &buffer);

+ }

+ Handle<String> result;

+ int totalLength = buffer.length();

+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(

+ isolate, result, isolate->factory()->NewRawOneByteString(totalLength));

Yang 2016/05/12 07:39:21 NewStringFromAscii(buffer.ToConstVector()) would w

Franzi 2016/05/13 09:42:03 Done. I'm using NewStringFromOneByte() because buf

+ for (int i = 0; i < totalLength; i++) {

+ uint16_t value = buffer.at(i);

+ result->Set(i, value);

+ }

+ return *result;

RUNTIME_FUNCTION(Runtime_StringLessThan) {

HandleScope handle_scope(isolate);

DCHECK_EQ(2, args.length());

@@ -1265,7 +1395,7 @@ RUNTIME_FUNCTION(Runtime_StringCharAt) {

if (!args[0]->IsString()) return Smi::FromInt(0);

if (!args[1]->IsNumber()) return Smi::FromInt(0);

if (std::isinf(args.number_at(1))) return isolate->heap()->empty_string();

- Object* code = __RT_impl_Runtime_StringCharCodeAtRT(args, isolate);

+ Object *code = __RT_impl_Runtime_StringCharCodeAtRT(args, isolate);

if (code->IsNaN()) return isolate->heap()->empty_string();

return __RT_impl_Runtime_StringCharFromCode(Arguments(1, &code), isolate);

}

« src/runtime/runtime.h ('K') | « src/runtime/runtime.h ('k') | test/mjsunit/messages.js » ('j') | test/mjsunit/messages.js » ('J')