src/uri.cc - Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions.

Unified Diff: src/uri.cc

Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Address review comments Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/uri.cc

diff --git a/src/uri.cc b/src/uri.cc

index c459be5e53f4ad915d669a0122cec77b1d1889e2..0c0ef9c0cb1b214d5a92cabebe3bfc4cee7f99d8 100644

--- a/src/uri.cc

+++ b/src/uri.cc

@@ -60,36 +60,29 @@ void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {

}

void EncodeSingle(uc16 c, List<uint8_t>* buffer) {

- uint8_t x = (c >> 12) & 0xF;

- uint8_t y = (c >> 6) & 63;

- uint8_t z = c & 63;

- if (c <= 0x007F) {

- AddHexEncodedToBuffer(c, buffer);

- } else if (c <= 0x07FF) {

- AddHexEncodedToBuffer(y + 192, buffer);

- AddHexEncodedToBuffer(z + 128, buffer);

- } else {

- AddHexEncodedToBuffer(x + 224, buffer);

- AddHexEncodedToBuffer(y + 128, buffer);

- AddHexEncodedToBuffer(z + 128, buffer);

+ char s[4];

+ int number_of_bytes;

+ number_of_bytes =

+ unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);

+ for (int k = 0; k < number_of_bytes; k++) {

+ AddHexEncodedToBuffer(s[k], buffer);

}

void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {

- uint8_t u = ((cc1 >> 6) & 0xF) + 1;

- uint8_t w = (cc1 >> 2) & 0xF;

- uint8_t x = cc1 & 3;

- uint8_t y = (cc2 >> 6) & 0xF;

- uint8_t z = cc2 & 63;

- AddHexEncodedToBuffer((u >> 2) + 240, buffer);

- AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer);

- AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer);

- AddHexEncodedToBuffer(z + 128, buffer);

+ char s[4];

+ int number_of_bytes =

+ unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),

+ unibrow::Utf16::kNoPreviousCharacter, false);

+ for (int k = 0; k < number_of_bytes; k++) {

+ AddHexEncodedToBuffer(s[k], buffer);

+ }

}

} // anonymous namespace

-Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) {

+MaybeHandle<Object> Uri::Encode(Isolate* isolate, Handle<String> uri,

+ bool is_uri) {

uri = String::Flatten(uri);

int uri_length = uri->length();

List<uint8_t> buffer(uri_length);

@@ -120,15 +113,189 @@ Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) {

}

AllowHeapAllocation allocate_error_and_return;

- THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError());

+ THROW_NEW_ERROR(isolate, NewURIError(), Object);

}

Handle<String> result;

- ASSIGN_RETURN_FAILURE_ON_EXCEPTION(

+ ASSIGN_RETURN_ON_EXCEPTION(

isolate, result,

- isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()));

- return *result;

+ isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()), Object);

+ return result;

+namespace { // anonymous namespace for DecodeURI helper functions

+bool IsReservedPredicate(uc16 c) {

+ switch (c) {

+ case '#':

+ case '$':

+ case '&':

+ case '+':

+ case ',':

+ case '/':

+ case ':':

+ case ';':

+ case '=':

+ case '?':

+ case '@':

+ return true;

+ default:

+ return false;

+ }

+bool IsRepalcementCharacter(List<uint8_t>* octets) {

Yang 2016/05/23 06:44:32 typo.

Franzi 2016/05/23 08:55:57 Done.

+ // 0xFFFD is %ef%bf%bd

Yang 2016/05/23 06:44:32 What does this comment mean?

Franzi 2016/05/23 08:55:57 Reworded the comment to clarify why we check for t

+ if (octets->length() != 3 || octets->at(0) != 0xef || octets->at(1) != 0xbf ||

+ octets->at(2) != 0xbd) {

+ return false;

+ }

+ return true;

+bool DecodeOctets(List<uint8_t>* octets, List<uc16>* two_byte_buffer) {

+ size_t cursor = 0;

+ uc32 value = unibrow::Utf8::ValueOf(octets->ToConstVector().start(),

+ octets->length(), &cursor);

+ // kBadChar is the Replacement Character, which is the decoding of

+ // valid input %ef%bf%bd

+ if (value == unibrow::Utf8::kBadChar && !IsRepalcementCharacter(octets)) {

+ return false;

+ }

+ if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) {

+ two_byte_buffer->Add(value);

+ } else {

+ two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value));

+ two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value));

+ }

+ return true;

+bool TwoDigitHex(uc16& decoded, int k, String::FlatContent* uri_content) {

Yang 2016/05/23 06:44:31 Can we use uc16* as argument type? That way it's e

Yang 2016/05/23 06:44:32 can we call the second argument "index" or somethi

Franzi 2016/05/23 08:55:57 Done.

+ char high = HexValue(uri_content->Get(k + 1));

Yang 2016/05/23 06:44:32 FlatContent::Get returns a uc16. Casting that to s

Franzi 2016/05/23 08:55:57 Not sure I understand the comment. HexValue takes

Yang 2016/05/23 11:24:59 Ah I see. I misunderstood. Nevermind this comment.

+ char low = HexValue(uri_content->Get(k + 2));

+ if (high < 0 || low < 0) {

+ return false;

+ }

+ decoded = (high << 4) | low;

+ return true;

+template <typename T>

+void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int k,

Yang 2016/05/23 06:44:32 same here, "index" instead of "k".

Franzi 2016/05/23 08:55:57 Done.

+ bool is_uri, List<T>* buffer) {

+ if (is_uri && IsReservedPredicate(decoded)) {

+ buffer->Add('%');

+ buffer->Add(uri_content->Get(k + 1));

+ buffer->Add(uri_content->Get(k + 2));

Yang 2016/05/23 06:44:31 Can we have a safeguard here that we don't have im

Franzi 2016/05/23 08:55:57 Done. Throwing exception if uri_content->Get() is

+ } else {

+ buffer->Add(decoded);

+ }

+bool IntoTwoByte(int index, bool is_uri, int uri_length,

+ String::FlatContent* uri_content,

+ List<uc16>* two_byte_buffer) {

+ for (int k = index; k < uri_length; k++) {

+ uc16 code = uri_content->Get(k);

+ if (code == '%') {

+ uc16 decoded;

+ if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, uri_content)) {

+ return false;

+ }

+ k += 2;

+ if (decoded > unibrow::Utf8::kMaxOneByteChar) {

+ int n = 0;

+ while (((decoded << ++n) & 0x80) != 0) {

Yang 2016/05/23 06:44:31 Can we have this as do { n++; } while ((decode

Franzi 2016/05/23 08:55:57 Changed it to a simple while loop: int n = 1;

+ }

+ if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) {

+ return false;

+ }

+ List<uint8_t> octets;

Yang 2016/05/23 06:44:31 octets will at most have the length 4, right? Can

Franzi 2016/05/23 08:55:57 Done.

+ octets.Add(decoded);

+ for (int i = 1; i < n; i++) {

+ uc16 decodedTrail;

+ if (uri_content->Get(++k) != '%' || k + 2 >= uri_length ||

+ !TwoDigitHex(decodedTrail, k, uri_content)) {

+ return false;

+ }

+ k += 2;

+ octets.Add(decodedTrail);

+ }

+ if (!DecodeOctets(&octets, two_byte_buffer)) {

+ return false;

+ }

+ } else {

+ AddToBuffer(decoded, uri_content, k - 2, is_uri, two_byte_buffer);

+ }

+ } else {

+ two_byte_buffer->Add(code);

+ }

+ return true;

+bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,

+ List<uint8_t>* one_byte_buffer,

+ List<uc16>* two_byte_buffer) {

+ DisallowHeapAllocation no_gc;

+ String::FlatContent uri_content = uri->GetFlatContent();

+ int uri_length = uri->length();

+ for (int k = 0; k < uri_length; k++) {

+ uc16 code = uri_content.Get(k);

+ if (code == '%') {

+ uc16 decoded;

+ if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, &uri_content)) {

+ return false;

+ }

+ if (decoded > unibrow::Utf8::kMaxOneByteChar) {

+ return IntoTwoByte(k, is_uri, uri_length, &uri_content,

+ two_byte_buffer);

+ }

+ AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);

+ k += 2;

+ } else {

+ if (code > unibrow::Utf8::kMaxOneByteChar) {

+ return IntoTwoByte(k, is_uri, uri_length, &uri_content,

+ two_byte_buffer);

+ }

+ one_byte_buffer->Add(code);

+ }

+ return true;

+} // anonymous namespace

+MaybeHandle<Object> Uri::Decode(Isolate* isolate, Handle<String> uri,

+ bool is_uri) {

+ uri = String::Flatten(uri);

+ List<uint8_t> one_byte_buffer;

+ List<uc16> two_byte_buffer;

+ if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {

+ THROW_NEW_ERROR(isolate, NewURIError(), Object);

+ }

+ Handle<String> left = isolate->factory()->InternalizeOneByteString(

+ one_byte_buffer.ToConstVector());

+ Handle<String> right = isolate->factory()->InternalizeTwoByteString(

+ two_byte_buffer.ToConstVector());

+ Handle<String> result;

+ ASSIGN_RETURN_ON_EXCEPTION(

+ isolate, result, isolate->factory()->NewConsString(left, right), Object);

Yang 2016/05/23 06:44:32 Since we are going to copy from list into the heap

Franzi 2016/05/23 08:55:57 Returning sequential one- or two-byte string.

+ return result;

}

} // namespace internal

« no previous file with comments | « src/uri.h ('k') | test/cctest/compiler/test-run-intrinsics.cc » ('j') | test/mjsunit/regress-612109.js » ('J')