Chromium Code Reviews| Index: src/uri.cc |
| diff --git a/src/uri.cc b/src/uri.cc |
| index c459be5e53f4ad915d669a0122cec77b1d1889e2..0c0ef9c0cb1b214d5a92cabebe3bfc4cee7f99d8 100644 |
| --- a/src/uri.cc |
| +++ b/src/uri.cc |
| @@ -60,36 +60,29 @@ void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { |
| } |
| void EncodeSingle(uc16 c, List<uint8_t>* buffer) { |
| - uint8_t x = (c >> 12) & 0xF; |
| - uint8_t y = (c >> 6) & 63; |
| - uint8_t z = c & 63; |
| - if (c <= 0x007F) { |
| - AddHexEncodedToBuffer(c, buffer); |
| - } else if (c <= 0x07FF) { |
| - AddHexEncodedToBuffer(y + 192, buffer); |
| - AddHexEncodedToBuffer(z + 128, buffer); |
| - } else { |
| - AddHexEncodedToBuffer(x + 224, buffer); |
| - AddHexEncodedToBuffer(y + 128, buffer); |
| - AddHexEncodedToBuffer(z + 128, buffer); |
| + char s[4]; |
| + int number_of_bytes; |
| + number_of_bytes = |
| + unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false); |
| + for (int k = 0; k < number_of_bytes; k++) { |
| + AddHexEncodedToBuffer(s[k], buffer); |
| } |
| } |
| void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { |
| - uint8_t u = ((cc1 >> 6) & 0xF) + 1; |
| - uint8_t w = (cc1 >> 2) & 0xF; |
| - uint8_t x = cc1 & 3; |
| - uint8_t y = (cc2 >> 6) & 0xF; |
| - uint8_t z = cc2 & 63; |
| - AddHexEncodedToBuffer((u >> 2) + 240, buffer); |
| - AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); |
| - AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer); |
| - AddHexEncodedToBuffer(z + 128, buffer); |
| + char s[4]; |
| + int number_of_bytes = |
| + unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2), |
| + unibrow::Utf16::kNoPreviousCharacter, false); |
| + for (int k = 0; k < number_of_bytes; k++) { |
| + AddHexEncodedToBuffer(s[k], buffer); |
| + } |
| } |
| } // anonymous namespace |
| -Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) { |
| +MaybeHandle<Object> Uri::Encode(Isolate* isolate, Handle<String> uri, |
| + bool is_uri) { |
| uri = String::Flatten(uri); |
| int uri_length = uri->length(); |
| List<uint8_t> buffer(uri_length); |
| @@ -120,15 +113,189 @@ Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) { |
| } |
| AllowHeapAllocation allocate_error_and_return; |
| - THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError()); |
| + THROW_NEW_ERROR(isolate, NewURIError(), Object); |
| } |
| } |
| Handle<String> result; |
| - ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| + ASSIGN_RETURN_ON_EXCEPTION( |
| isolate, result, |
| - isolate->factory()->NewStringFromOneByte(buffer.ToConstVector())); |
| - return *result; |
| + isolate->factory()->NewStringFromOneByte(buffer.ToConstVector()), Object); |
| + return result; |
| +} |
| + |
| +namespace { // anonymous namespace for DecodeURI helper functions |
| + |
| +bool IsReservedPredicate(uc16 c) { |
| + switch (c) { |
| + case '#': |
| + case '$': |
| + case '&': |
| + case '+': |
| + case ',': |
| + case '/': |
| + case ':': |
| + case ';': |
| + case '=': |
| + case '?': |
| + case '@': |
| + return true; |
| + default: |
| + return false; |
| + } |
| +} |
| + |
| +bool IsRepalcementCharacter(List<uint8_t>* octets) { |
|
Yang
2016/05/23 06:44:32
typo.
Franzi
2016/05/23 08:55:57
Done.
|
| + // 0xFFFD is %ef%bf%bd |
|
Yang
2016/05/23 06:44:32
What does this comment mean?
Franzi
2016/05/23 08:55:57
Reworded the comment to clarify why we check for t
|
| + if (octets->length() != 3 || octets->at(0) != 0xef || octets->at(1) != 0xbf || |
| + octets->at(2) != 0xbd) { |
| + return false; |
| + } |
| + return true; |
| +} |
| + |
| +bool DecodeOctets(List<uint8_t>* octets, List<uc16>* two_byte_buffer) { |
| + size_t cursor = 0; |
| + uc32 value = unibrow::Utf8::ValueOf(octets->ToConstVector().start(), |
| + octets->length(), &cursor); |
| + // kBadChar is the Replacement Character, which is the decoding of |
| + // valid input %ef%bf%bd |
| + if (value == unibrow::Utf8::kBadChar && !IsRepalcementCharacter(octets)) { |
| + return false; |
| + } |
| + |
| + if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| + two_byte_buffer->Add(value); |
| + } else { |
| + two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value)); |
| + two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value)); |
| + } |
| + return true; |
| +} |
| + |
| +bool TwoDigitHex(uc16& decoded, int k, String::FlatContent* uri_content) { |
|
Yang
2016/05/23 06:44:31
Can we use uc16* as argument type? That way it's e
Yang
2016/05/23 06:44:32
can we call the second argument "index" or somethi
Franzi
2016/05/23 08:55:57
Done.
Franzi
2016/05/23 08:55:57
Done.
|
| + char high = HexValue(uri_content->Get(k + 1)); |
|
Yang
2016/05/23 06:44:32
FlatContent::Get returns a uc16. Casting that to s
Franzi
2016/05/23 08:55:57
Not sure I understand the comment. HexValue takes
Yang
2016/05/23 11:24:59
Ah I see. I misunderstood. Nevermind this comment.
|
| + char low = HexValue(uri_content->Get(k + 2)); |
| + if (high < 0 || low < 0) { |
| + return false; |
| + } |
| + decoded = (high << 4) | low; |
| + return true; |
| +} |
| + |
| +template <typename T> |
| +void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int k, |
|
Yang
2016/05/23 06:44:32
same here, "index" instead of "k".
Franzi
2016/05/23 08:55:57
Done.
|
| + bool is_uri, List<T>* buffer) { |
| + if (is_uri && IsReservedPredicate(decoded)) { |
| + buffer->Add('%'); |
| + buffer->Add(uri_content->Get(k + 1)); |
| + buffer->Add(uri_content->Get(k + 2)); |
|
Yang
2016/05/23 06:44:31
Can we have a safeguard here that we don't have im
Franzi
2016/05/23 08:55:57
Done. Throwing exception if uri_content->Get() is
|
| + } else { |
| + buffer->Add(decoded); |
| + } |
| +} |
| + |
| +bool IntoTwoByte(int index, bool is_uri, int uri_length, |
| + String::FlatContent* uri_content, |
| + List<uc16>* two_byte_buffer) { |
| + for (int k = index; k < uri_length; k++) { |
| + uc16 code = uri_content->Get(k); |
| + if (code == '%') { |
| + uc16 decoded; |
| + if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, uri_content)) { |
| + return false; |
| + } |
| + k += 2; |
| + if (decoded > unibrow::Utf8::kMaxOneByteChar) { |
| + int n = 0; |
| + while (((decoded << ++n) & 0x80) != 0) { |
|
Yang
2016/05/23 06:44:31
Can we have this as
do {
n++;
} while ((decode
Franzi
2016/05/23 08:55:57
Changed it to a simple while loop:
int n = 1;
|
| + } |
| + if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) { |
| + return false; |
| + } |
| + List<uint8_t> octets; |
|
Yang
2016/05/23 06:44:31
octets will at most have the length 4, right? Can
Franzi
2016/05/23 08:55:57
Done.
|
| + octets.Add(decoded); |
| + |
| + for (int i = 1; i < n; i++) { |
| + uc16 decodedTrail; |
| + |
| + if (uri_content->Get(++k) != '%' || k + 2 >= uri_length || |
| + !TwoDigitHex(decodedTrail, k, uri_content)) { |
| + return false; |
| + } |
| + k += 2; |
| + octets.Add(decodedTrail); |
| + } |
| + |
| + if (!DecodeOctets(&octets, two_byte_buffer)) { |
| + return false; |
| + } |
| + } else { |
| + AddToBuffer(decoded, uri_content, k - 2, is_uri, two_byte_buffer); |
| + } |
| + } else { |
| + two_byte_buffer->Add(code); |
| + } |
| + } |
| + return true; |
| +} |
| + |
| +bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri, |
| + List<uint8_t>* one_byte_buffer, |
| + List<uc16>* two_byte_buffer) { |
| + DisallowHeapAllocation no_gc; |
| + String::FlatContent uri_content = uri->GetFlatContent(); |
| + |
| + int uri_length = uri->length(); |
| + for (int k = 0; k < uri_length; k++) { |
| + uc16 code = uri_content.Get(k); |
| + if (code == '%') { |
| + uc16 decoded; |
| + if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, &uri_content)) { |
| + return false; |
| + } |
| + |
| + if (decoded > unibrow::Utf8::kMaxOneByteChar) { |
| + return IntoTwoByte(k, is_uri, uri_length, &uri_content, |
| + two_byte_buffer); |
| + } |
| + |
| + AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer); |
| + k += 2; |
| + } else { |
| + if (code > unibrow::Utf8::kMaxOneByteChar) { |
| + return IntoTwoByte(k, is_uri, uri_length, &uri_content, |
| + two_byte_buffer); |
| + } |
| + one_byte_buffer->Add(code); |
| + } |
| + } |
| + return true; |
| +} |
| + |
| +} // anonymous namespace |
| + |
| +MaybeHandle<Object> Uri::Decode(Isolate* isolate, Handle<String> uri, |
| + bool is_uri) { |
| + uri = String::Flatten(uri); |
| + List<uint8_t> one_byte_buffer; |
| + List<uc16> two_byte_buffer; |
| + |
| + if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) { |
| + THROW_NEW_ERROR(isolate, NewURIError(), Object); |
| + } |
| + |
| + Handle<String> left = isolate->factory()->InternalizeOneByteString( |
| + one_byte_buffer.ToConstVector()); |
| + |
| + Handle<String> right = isolate->factory()->InternalizeTwoByteString( |
| + two_byte_buffer.ToConstVector()); |
| + |
| + Handle<String> result; |
| + ASSIGN_RETURN_ON_EXCEPTION( |
| + isolate, result, isolate->factory()->NewConsString(left, right), Object); |
|
Yang
2016/05/23 06:44:32
Since we are going to copy from list into the heap
Franzi
2016/05/23 08:55:57
Returning sequential one- or two-byte string.
|
| + |
| + return result; |
| } |
| } // namespace internal |