Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1314)

Unified Diff: src/runtime/runtime-strings.cc

Issue 1968953002: [runtime] Implement encodeURI as single runtime function. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Fix isalnum() bug Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/runtime/runtime-strings.cc
diff --git a/src/runtime/runtime-strings.cc b/src/runtime/runtime-strings.cc
index 71f27a03d6edaf487efc206f11a8fca9eb76959a..c35be0115e9c8827df4a4aa6dbc21950b57927ee 100644
--- a/src/runtime/runtime-strings.cc
+++ b/src/runtime/runtime-strings.cc
@@ -2,11 +2,11 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "src/runtime/runtime-utils.h"
-
#include "src/arguments.h"
#include "src/regexp/jsregexp-inl.h"
+#include "src/runtime/runtime-utils.h"
#include "src/string-builder.h"
+
#include "src/string-search.h"
namespace v8 {
@@ -16,7 +16,7 @@ namespace internal {
// Perform string match of pattern on subject, starting at start index.
// Caller must ensure that 0 <= start_index <= sub->length(),
// and should check that pat->length() + start_index <= sub->length().
-int StringMatch(Isolate* isolate, Handle<String> sub, Handle<String> pat,
+int StringMatch(Isolate *isolate, Handle<String> sub, Handle<String> pat,
int start_index) {
DCHECK(0 <= start_index);
DCHECK(start_index <= sub->length());
@@ -57,15 +57,15 @@ int StringMatch(Isolate* isolate, Handle<String> sub, Handle<String> pat,
// This may return an empty MaybeHandle if an exception is thrown or
// we abort due to reaching the recursion limit.
MaybeHandle<String> StringReplaceOneCharWithString(
- Isolate* isolate, Handle<String> subject, Handle<String> search,
- Handle<String> replace, bool* found, int recursion_limit) {
+ Isolate *isolate, Handle<String> subject, Handle<String> search,
Yang 2016/05/12 07:39:21 V8's convention is to have the asterisk at the typ
Franzi 2016/05/13 09:42:02 Done.
+ Handle<String> replace, bool *found, int recursion_limit) {
StackLimitCheck stackLimitCheck(isolate);
if (stackLimitCheck.HasOverflowed() || (recursion_limit == 0)) {
return MaybeHandle<String>();
}
recursion_limit--;
if (subject->IsConsString()) {
- ConsString* cons = ConsString::cast(*subject);
+ ConsString *cons = ConsString::cast(*subject);
Handle<String> first = Handle<String>(cons->first());
Handle<String> second = Handle<String>(cons->second());
Handle<String> new_first;
@@ -145,7 +145,6 @@ RUNTIME_FUNCTION(Runtime_StringIndexOf) {
return Smi::FromInt(position);
}
-
template <typename schar, typename pchar>
static int StringMatchBackwards(Vector<const schar> subject,
Vector<const pchar> pattern, int idx) {
@@ -347,7 +346,7 @@ RUNTIME_FUNCTION(Runtime_StringMatch) {
ZoneList<int> offsets(8, zone_scope.zone());
while (true) {
- int32_t* match = global_cache.FetchNext();
+ int32_t *match = global_cache.FetchNext();
if (match == NULL) break;
offsets.Add(match[0], zone_scope.zone()); // start
offsets.Add(match[1], zone_scope.zone()); // end
@@ -454,7 +453,7 @@ RUNTIME_FUNCTION(Runtime_StringBuilderConcat) {
{
DisallowHeapAllocation no_gc;
- FixedArray* fixed_array = FixedArray::cast(array->elements());
+ FixedArray *fixed_array = FixedArray::cast(array->elements());
if (fixed_array->length() < array_length) {
array_length = fixed_array->length();
}
@@ -462,7 +461,7 @@ RUNTIME_FUNCTION(Runtime_StringBuilderConcat) {
if (array_length == 0) {
return isolate->heap()->empty_string();
} else if (array_length == 1) {
- Object* first = fixed_array->get(0);
+ Object *first = fixed_array->get(0);
if (first->IsString()) return first;
}
length = StringBuilderConcatLength(special_length, fixed_array,
@@ -513,7 +512,7 @@ RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {
if (array_length == 0) {
return isolate->heap()->empty_string();
} else if (array_length == 1) {
- Object* first = fixed_array->get(0);
+ Object *first = fixed_array->get(0);
RUNTIME_ASSERT(first->IsString());
return first;
}
@@ -527,9 +526,9 @@ RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {
}
int length = (array_length - 1) * separator_length;
for (int i = 0; i < array_length; i++) {
- Object* element_obj = fixed_array->get(i);
+ Object *element_obj = fixed_array->get(i);
RUNTIME_ASSERT(element_obj->IsString());
- String* element = String::cast(element_obj);
+ String *element = String::cast(element_obj);
int increment = element->length();
if (increment > String::kMaxLength - length) {
STATIC_ASSERT(String::kMaxLength < kMaxInt);
@@ -545,14 +544,14 @@ RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {
DisallowHeapAllocation no_gc;
- uc16* sink = answer->GetChars();
+ uc16 *sink = answer->GetChars();
#ifdef DEBUG
uc16* end = sink + length;
#endif
RUNTIME_ASSERT(fixed_array->get(0)->IsString());
- String* first = String::cast(fixed_array->get(0));
- String* separator_raw = *separator;
+ String *first = String::cast(fixed_array->get(0));
+ String *separator_raw = *separator;
int first_length = first->length();
String::WriteToFlat(first, sink, 0, first_length);
@@ -564,7 +563,7 @@ RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {
sink += separator_length;
RUNTIME_ASSERT(fixed_array->get(i)->IsString());
- String* element = String::cast(fixed_array->get(i));
+ String *element = String::cast(fixed_array->get(i));
int element_length = element->length();
DCHECK(sink + element_length <= end);
String::WriteToFlat(element, sink, 0, element_length);
@@ -578,15 +577,15 @@ RUNTIME_FUNCTION(Runtime_StringBuilderJoin) {
}
template <typename sinkchar>
-static void WriteRepeatToFlat(String* src, Vector<sinkchar> buffer, int cursor,
+static void WriteRepeatToFlat(String *src, Vector<sinkchar> buffer, int cursor,
int repeat, int length) {
if (repeat == 0) return;
- sinkchar* start = &buffer[cursor];
+ sinkchar *start = &buffer[cursor];
String::WriteToFlat<sinkchar>(src, start, 0, length);
int done = 1;
- sinkchar* next = start + length;
+ sinkchar *next = start + length;
while (done < repeat) {
int block = Min(done, repeat - done);
@@ -598,10 +597,10 @@ static void WriteRepeatToFlat(String* src, Vector<sinkchar> buffer, int cursor,
}
template <typename Char>
-static void JoinSparseArrayWithSeparator(FixedArray* elements,
+static void JoinSparseArrayWithSeparator(FixedArray *elements,
int elements_length,
uint32_t array_length,
- String* separator,
+ String *separator,
Vector<Char> buffer) {
DisallowHeapAllocation no_gc;
int previous_separator_position = 0;
@@ -610,7 +609,7 @@ static void JoinSparseArrayWithSeparator(FixedArray* elements,
int cursor = 0;
for (int i = 0; i < elements_length; i += 2) {
int position = NumberToInt32(elements->get(i));
- String* string = String::cast(elements->get(i + 1));
+ String *string = String::cast(elements->get(i + 1));
int string_length = string->length();
if (string->length() > 0) {
int repeat = position - previous_separator_position;
@@ -654,11 +653,11 @@ RUNTIME_FUNCTION(Runtime_SparseJoinWithSeparator) {
CONVERT_NUMBER_CHECKED(int, elements_length, Int32, elements_array->length());
RUNTIME_ASSERT(elements_length <= elements_array->elements()->length());
RUNTIME_ASSERT((elements_length & 1) == 0); // Even length.
- FixedArray* elements = FixedArray::cast(elements_array->elements());
+ FixedArray *elements = FixedArray::cast(elements_array->elements());
{
DisallowHeapAllocation no_gc;
for (int i = 0; i < elements_length; i += 2) {
- String* string = String::cast(elements->get(i + 1));
+ String *string = String::cast(elements->get(i + 1));
int length = string->length();
if (is_one_byte && !string->IsOneByteRepresentation()) {
is_one_byte = false;
@@ -723,15 +722,15 @@ RUNTIME_FUNCTION(Runtime_SparseJoinWithSeparator) {
// one-char strings in the cache. Gives up on the first char that is
// not in the cache and fills the remainder with smi zeros. Returns
// the length of the successfully copied prefix.
-static int CopyCachedOneByteCharsToArray(Heap* heap, const uint8_t* chars,
- FixedArray* elements, int length) {
+static int CopyCachedOneByteCharsToArray(Heap *heap, const uint8_t *chars,
+ FixedArray *elements, int length) {
DisallowHeapAllocation no_gc;
- FixedArray* one_byte_cache = heap->single_character_string_cache();
- Object* undefined = heap->undefined_value();
+ FixedArray *one_byte_cache = heap->single_character_string_cache();
+ Object *undefined = heap->undefined_value();
int i;
WriteBarrierMode mode = elements->GetWriteBarrierMode(no_gc);
for (i = 0; i < length; ++i) {
- Object* value = one_byte_cache->get(chars[i]);
+ Object *value = one_byte_cache->get(chars[i]);
if (value == undefined) break;
elements->set(i, value, mode);
}
@@ -806,11 +805,10 @@ static inline bool ToUpperOverflows(uc32 character) {
return (character == yuml_code || character == micro_code);
}
-
template <class Converter>
-MUST_USE_RESULT static Object* ConvertCaseHelper(
- Isolate* isolate, String* string, SeqString* result, int result_length,
- unibrow::Mapping<Converter, 128>* mapping) {
+MUST_USE_RESULT static Object *ConvertCaseHelper(
+ Isolate *isolate, String *string, SeqString *result, int result_length,
+ unibrow::Mapping<Converter, 128> *mapping) {
DisallowHeapAllocation no_gc;
// We try this twice, once with the assumption that the result is no longer
// than the input and, if that assumption breaks, again with the exact
@@ -946,10 +944,9 @@ static bool CheckFastAsciiConvert(char* dst, const char* src, int length,
}
#endif
-
template <class Converter>
-static bool FastAsciiConvert(char* dst, const char* src, int length,
- bool* changed_out) {
+static bool FastAsciiConvert(char *dst, const char *src, int length,
+ bool *changed_out) {
#ifdef DEBUG
char* saved_dst = dst;
const char* saved_src = src;
@@ -963,7 +960,7 @@ static bool FastAsciiConvert(char* dst, const char* src, int length,
static const char hi = Converter::kIsToLower ? 'Z' + 1 : 'z' + 1;
bool changed = false;
uintptr_t or_acc = 0;
- const char* const limit = src + length;
+ const char *const limit = src + length;
// dst is newly allocated and always aligned.
DCHECK(IsAligned(reinterpret_cast<intptr_t>(dst), sizeof(uintptr_t)));
@@ -972,26 +969,26 @@ static bool FastAsciiConvert(char* dst, const char* src, int length,
// Process the prefix of the input that requires no conversion one aligned
// (machine) word at a time.
while (src <= limit - sizeof(uintptr_t)) {
- const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
+ const uintptr_t w = *reinterpret_cast<const uintptr_t *>(src);
or_acc |= w;
if (AsciiRangeMask(w, lo, hi) != 0) {
changed = true;
break;
}
- *reinterpret_cast<uintptr_t*>(dst) = w;
+ *reinterpret_cast<uintptr_t *>(dst) = w;
src += sizeof(uintptr_t);
dst += sizeof(uintptr_t);
}
// Process the remainder of the input performing conversion when
// required one word at a time.
while (src <= limit - sizeof(uintptr_t)) {
- const uintptr_t w = *reinterpret_cast<const uintptr_t*>(src);
+ const uintptr_t w = *reinterpret_cast<const uintptr_t *>(src);
or_acc |= w;
uintptr_t m = AsciiRangeMask(w, lo, hi);
// The mask has high (7th) bit set in every byte that needs
// conversion and we know that the distance between cases is
// 1 << 5.
- *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
+ *reinterpret_cast<uintptr_t *>(dst) = w ^ (m >> 2);
src += sizeof(uintptr_t);
dst += sizeof(uintptr_t);
}
@@ -1019,11 +1016,10 @@ static bool FastAsciiConvert(char* dst, const char* src, int length,
return true;
}
-
template <class Converter>
-MUST_USE_RESULT static Object* ConvertCase(
- Handle<String> s, Isolate* isolate,
- unibrow::Mapping<Converter, 128>* mapping) {
+MUST_USE_RESULT static Object *ConvertCase(
Yang 2016/05/12 07:39:21 You probably did some auto format on your IDE. Ple
Franzi 2016/05/13 09:42:03 Acknowledged.
+ Handle<String> s, Isolate *isolate,
+ unibrow::Mapping<Converter, 128> *mapping) {
s = String::Flatten(s);
int length = s->length();
// Assume that the string is not empty; we need this assumption later
@@ -1044,8 +1040,8 @@ MUST_USE_RESULT static Object* ConvertCase(
DCHECK(flat_content.IsFlat());
bool has_changed_character = false;
bool is_ascii = FastAsciiConvert<Converter>(
- reinterpret_cast<char*>(result->GetChars()),
- reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
+ reinterpret_cast<char *>(result->GetChars()),
+ reinterpret_cast<const char *>(flat_content.ToOneByteVector().start()),
length, &has_changed_character);
// If not ASCII, we discard the result and take the 2 byte path.
if (is_ascii) return has_changed_character ? *result : *s;
@@ -1058,7 +1054,7 @@ MUST_USE_RESULT static Object* ConvertCase(
result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
}
- Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
+ Object *answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
if (answer->IsException() || answer->IsString()) return answer;
DCHECK(answer->IsSmi());
@@ -1103,7 +1099,7 @@ RUNTIME_FUNCTION(Runtime_StringTrim) {
int length = string->length();
int left = 0;
- UnicodeCache* unicode_cache = isolate->unicode_cache();
+ UnicodeCache *unicode_cache = isolate->unicode_cache();
if (trimLeft) {
while (left < length &&
unicode_cache->IsWhiteSpaceOrLineTerminator(string->Get(left))) {
@@ -1151,6 +1147,140 @@ RUNTIME_FUNCTION(Runtime_NewString) {
return *result;
}
+bool unescapePredicateInComponent(uint16_t c) {
Yang 2016/05/12 07:39:21 CamelCase with capital first letter please. Also
Franzi 2016/05/13 09:42:03 Done.
+ // do not escape alphanumeric or !'()*-._~
+ if (c < 256 && isalnum(c)) {
Yang 2016/05/12 07:39:21 Let's use IsAlphaNumeric from char-predicates.h. T
Franzi 2016/05/13 09:42:03 Done.
+ return true;
+ }
+ if (c == '!' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '-' ||
+ c == '.' || c == '_' || c == '~') {
Yang 2016/05/12 07:39:21 A switch would be a lot nicer to read.
Franzi 2016/05/13 09:42:02 Done.
+ return true;
+ }
+
+ return false;
+}
+
+bool uriSeparator(uint16_t c) {
+ // separators are #:;/?$&+,@=
+ if (c == '#' || c == ':' || c == ';' || c == '/' || c == '?' || c == '$' ||
Yang 2016/05/12 07:39:21 Same here, a switch would be nice.
Franzi 2016/05/13 09:42:02 Done.
+ c == '&' || c == '+' || c == ',' || c == '@' || c == '=') {
+ return true;
+ }
+ return false;
+}
+
+bool unescape(uint16_t c, bool is_url) {
Yang 2016/05/12 07:39:21 This is only used once. Let's inline it at the cal
Franzi 2016/05/13 09:42:03 Done.
+ if (unescapePredicateInComponent(c)) {
+ return true;
+ }
+ return is_url && uriSeparator(c);
+}
+
+void uriAddEncodedOctetToBuffer(uint16_t octet, List<uint16_t> *result) {
+ // Do I need lazy initialization?
Yang 2016/05/12 07:39:21 No. Just declare them as static const. And using a
Franzi 2016/05/13 09:42:02 Done.
+ int hexCharCodeArray[16] = {48, 49, 50, 51, 52, 53, 54, 55, 56, 57, // 0..9
Yang 2016/05/12 07:39:21 We have a HexCharOfValue in bignum.cc. You could m
+ 65, 66, 67, 68, 69, 70}; // A..F
+
+ result->Add(37); // Char code of '%'.
Yang 2016/05/12 07:39:21 How about result->Add('%')
Franzi 2016/05/13 09:42:02 Done.
+ uint16_t firstHex = octet >> 4;
+ uint16_t secondHex = octet & 0x0F;
+
+ DCHECK(firstHex < 16);
+ DCHECK(secondHex < 16);
+
+ result->Add(hexCharCodeArray[firstHex]);
+ result->Add(hexCharCodeArray[secondHex]);
+}
+
+void uriEncodeOctets(uint16_t *octets, List<uint16_t> *result) {
+ uriAddEncodedOctetToBuffer(octets[0], result);
+ if (octets[1]) uriAddEncodedOctetToBuffer(octets[1], result);
+ if (octets[2]) uriAddEncodedOctetToBuffer(octets[2], result);
+ if (octets[3]) uriAddEncodedOctetToBuffer(octets[3], result);
+}
+
+void uriEncodeSingle(uint16_t cc, List<uint16_t> *result) {
+ // 16 bits total, cut it in 4,6, and 6 bits
Yang 2016/05/12 07:39:21 whitespace after comma.
Franzi 2016/05/13 09:42:02 Done.
+ uint16_t x = (cc >> 12) & 0xF;
Yang 2016/05/12 07:39:21 uint8_t should suffice.
Franzi 2016/05/13 09:42:02 Done.
+ uint16_t y = (cc >> 6) & 63;
+ uint16_t z = cc & 63; // get last 6 bits
+ uint16_t octets[4] = {0, 0, 0, 0}; // TODO(franzih) 3 is enough here
+ if (cc <= 0x007F) { // smaller than 8 bits, i.e., 128
+ octets[0] = cc; // ascii same as UTF-8
Yang 2016/05/12 07:39:21 Instead of collecting octets, let's just call uriA
Franzi 2016/05/13 09:42:03 Done.
+ } else if (cc <= 0x07FF) {
+ octets[0] = y + 192; // Leading byte: 110xxxxx
+ octets[1] = z + 128; // Continuation byte: 10xxxxxx
+ } else {
+ octets[0] = x + 224; // Leading byte: 1110xxxx
+ octets[1] = y + 128; // Continuation byte: 10xxxxxx
+ octets[2] = z + 128; // Continuation byte: 10xxxxxx
+ }
+
+ uriEncodeOctets(octets, result);
+}
+
+void uriEncodePair(uint16_t cc1, uint16_t cc2, List<uint16_t> *result) {
+ uint16_t u = ((cc1 >> 6) & 0xF) + 1;
+ uint16_t w = (cc1 >> 2) & 0xF;
+ uint16_t x = cc1 & 3;
+ uint16_t y = (cc2 >> 6) & 0xF;
+ uint16_t z = cc2 & 63;
+ uint16_t octets[4] = {0, 0, 0, 0};
+ octets[0] = (u >> 2) + 240;
+ octets[1] = (((u & 3) << 4) | w) + 128;
+ octets[2] = ((x << 4) | y) + 128;
+ octets[3] = z + 128;
+ uriEncodeOctets(octets, result);
+}
+
+RUNTIME_FUNCTION(Runtime_Encode) {
+ HandleScope scope(isolate);
+ DCHECK(args.length() == 2);
+ CONVERT_ARG_HANDLE_CHECKED(String, uri, 0);
+ CONVERT_BOOLEAN_ARG_CHECKED(is_uri, 1);
+
+ size_t uriLength = uri->length();
Yang 2016/05/12 07:39:20 Let's use int here.
Franzi 2016/05/13 09:42:03 Done.
+ List<uint16_t> buffer;
Yang 2016/05/12 07:39:21 The result can only be ASCII characters, so please
Franzi 2016/05/13 09:42:03 Done.
+
+ for (size_t k = 0; k < uriLength; k++) {
Yang 2016/05/12 07:39:21 And int here.
Franzi 2016/05/13 09:42:02 Done.
+ uint16_t cc1 = uri->Get(static_cast<int>(k));
Yang 2016/05/12 07:39:21 Calling Get is fairly inefficient. We should flat
Franzi 2016/05/13 09:42:02 Done. Using Get() on FlatContent.
+
+ if (unescape(cc1, is_uri)) {
+ DCHECK(cc1 <= String::kMaxOneByteCharCode);
+ buffer.Add(cc1);
+ } else {
+ if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) {
Yang 2016/05/12 07:39:21 please do not use magic numbers here. Use unibrow:
Franzi 2016/05/13 09:42:02 Done.
+ THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewMakeURIError());
+ }
+ if (cc1 < 0xD800 || cc1 > 0xDBFF) {
Yang 2016/05/12 07:39:21 !unibrow::Utf16::IsLeadSurrogate
Franzi 2016/05/13 09:42:02 Done.
+ uriEncodeSingle(cc1, &buffer);
+ } else {
+ k++;
+ if (k == uriLength) {
+ THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewMakeURIError());
+ }
+ uint16_t cc2 = uri->Get(static_cast<int>(k));
+ if (cc2 < 0xDC00 || cc2 > 0xDFFF) {
Yang 2016/05/12 07:39:21 Here as well.
Franzi 2016/05/13 09:42:03 Done.
+ THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewMakeURIError());
+ }
+ uriEncodePair(cc1, cc2, &buffer);
+ }
+ }
+ }
+
+ Handle<String> result;
+ int totalLength = buffer.length();
+
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, result, isolate->factory()->NewRawOneByteString(totalLength));
Yang 2016/05/12 07:39:21 NewStringFromAscii(buffer.ToConstVector()) would w
Franzi 2016/05/13 09:42:03 Done. I'm using NewStringFromOneByte() because buf
+
+ for (int i = 0; i < totalLength; i++) {
+ uint16_t value = buffer.at(i);
+ result->Set(i, value);
+ }
+ return *result;
+}
+
RUNTIME_FUNCTION(Runtime_StringLessThan) {
HandleScope handle_scope(isolate);
DCHECK_EQ(2, args.length());
@@ -1265,7 +1395,7 @@ RUNTIME_FUNCTION(Runtime_StringCharAt) {
if (!args[0]->IsString()) return Smi::FromInt(0);
if (!args[1]->IsNumber()) return Smi::FromInt(0);
if (std::isinf(args.number_at(1))) return isolate->heap()->empty_string();
- Object* code = __RT_impl_Runtime_StringCharCodeAtRT(args, isolate);
+ Object *code = __RT_impl_Runtime_StringCharCodeAtRT(args, isolate);
if (code->IsNaN()) return isolate->heap()->empty_string();
return __RT_impl_Runtime_StringCharFromCode(Arguments(1, &code), isolate);
}

Powered by Google App Engine
This is Rietveld 408576698