| Index: src/uri.cc
|
| diff --git a/src/uri.cc b/src/uri.cc
|
| index c459be5e53f4ad915d669a0122cec77b1d1889e2..152327ea6d1584f1d0c208fce9c04c52ee4dd77c 100644
|
| --- a/src/uri.cc
|
| +++ b/src/uri.cc
|
| @@ -60,31 +60,23 @@ void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {
|
| }
|
|
|
| void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
|
| - uint8_t x = (c >> 12) & 0xF;
|
| - uint8_t y = (c >> 6) & 63;
|
| - uint8_t z = c & 63;
|
| - if (c <= 0x007F) {
|
| - AddHexEncodedToBuffer(c, buffer);
|
| - } else if (c <= 0x07FF) {
|
| - AddHexEncodedToBuffer(y + 192, buffer);
|
| - AddHexEncodedToBuffer(z + 128, buffer);
|
| - } else {
|
| - AddHexEncodedToBuffer(x + 224, buffer);
|
| - AddHexEncodedToBuffer(y + 128, buffer);
|
| - AddHexEncodedToBuffer(z + 128, buffer);
|
| + char s[4];
|
| + int number_of_bytes;
|
| + number_of_bytes =
|
| + unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
|
| + for (int k = 0; k < number_of_bytes; k++) {
|
| + AddHexEncodedToBuffer(s[k], buffer);
|
| }
|
| }
|
|
|
| void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
|
| - uint8_t u = ((cc1 >> 6) & 0xF) + 1;
|
| - uint8_t w = (cc1 >> 2) & 0xF;
|
| - uint8_t x = cc1 & 3;
|
| - uint8_t y = (cc2 >> 6) & 0xF;
|
| - uint8_t z = cc2 & 63;
|
| - AddHexEncodedToBuffer((u >> 2) + 240, buffer);
|
| - AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer);
|
| - AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer);
|
| - AddHexEncodedToBuffer(z + 128, buffer);
|
| + char s[4];
|
| + int number_of_bytes =
|
| + unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
|
| + unibrow::Utf16::kNoPreviousCharacter, false);
|
| + for (int k = 0; k < number_of_bytes; k++) {
|
| + AddHexEncodedToBuffer(s[k], buffer);
|
| + }
|
| }
|
|
|
| } // anonymous namespace
|
| @@ -131,5 +123,178 @@ Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) {
|
| return *result;
|
| }
|
|
|
| +namespace { // anonymous namespace for DecodeURI helper functions
|
| +
|
| +bool IsReservedPredicate(uc16 c) {
|
| + switch (c) {
|
| + case '#':
|
| + case '$':
|
| + case '&':
|
| + case '+':
|
| + case ',':
|
| + case '/':
|
| + case ':':
|
| + case ';':
|
| + case '=':
|
| + case '?':
|
| + case '@':
|
| + return true;
|
| + default:
|
| + return false;
|
| + }
|
| +}
|
| +
|
| +bool IsRepalcementCharacter(List<uint8_t>* octets) {
|
| + // 0xFFFD is %ef%bf%bd
|
| + if (octets->length() != 3 || octets->at(0) != 0xef || octets->at(1) != 0xbf ||
|
| + octets->at(2) != 0xbd) {
|
| + return false;
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +bool DecodeOctets(List<uint8_t>* octets, List<uc16>* two_byte_buffer) {
|
| + size_t cursor = 0;
|
| + uc32 value = unibrow::Utf8::ValueOf(octets->ToConstVector().start(),
|
| + octets->length(), &cursor);
|
| + // kBadChar is the Replacement Character, which is the decoding of
|
| + // valid input %ef%bf%bd
|
| + if (value == unibrow::Utf8::kBadChar && !IsRepalcementCharacter(octets)) {
|
| + return false;
|
| + }
|
| +
|
| + if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
| + two_byte_buffer->Add(value);
|
| + } else {
|
| + two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value));
|
| + two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value));
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +bool TwoDigitHex(uc16& decoded, int k, String::FlatContent* uri_content) {
|
| + char high = HexValue(uri_content->Get(k + 1));
|
| + char low = HexValue(uri_content->Get(k + 2));
|
| + if (high < 0 || low < 0) {
|
| + return false;
|
| + }
|
| + decoded = (high << 4) | low;
|
| + return true;
|
| +}
|
| +
|
| +template <typename T>
|
| +void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int k,
|
| + bool is_uri, List<T>* buffer) {
|
| + if (is_uri && IsReservedPredicate(decoded)) {
|
| + buffer->Add('%');
|
| + buffer->Add(uri_content->Get(k + 1));
|
| + buffer->Add(uri_content->Get(k + 2));
|
| + } else {
|
| + buffer->Add(decoded);
|
| + }
|
| +}
|
| +
|
| +bool IntoTwoByte(int index, bool is_uri, int uri_length,
|
| + String::FlatContent* uri_content,
|
| + List<uc16>* two_byte_buffer) {
|
| + for (int k = index; k < uri_length; k++) {
|
| + uc16 code = uri_content->Get(k);
|
| + if (code == '%') {
|
| + uc16 decoded;
|
| + if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, uri_content)) {
|
| + return false;
|
| + }
|
| + k += 2;
|
| + if (decoded > unibrow::Utf8::kMaxOneByteChar) {
|
| + int n = 0;
|
| + while (((decoded << ++n) & 0x80) != 0) {
|
| + }
|
| + if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) {
|
| + return false;
|
| + }
|
| + List<uint8_t> octets;
|
| + octets.Add(decoded);
|
| +
|
| + for (int i = 1; i < n; i++) {
|
| + uc16 decodedTrail;
|
| +
|
| + if (uri_content->Get(++k) != '%' || k + 2 >= uri_length ||
|
| + !TwoDigitHex(decodedTrail, k, uri_content)) {
|
| + return false;
|
| + }
|
| + k += 2;
|
| + octets.Add(decodedTrail);
|
| + }
|
| +
|
| + if (!DecodeOctets(&octets, two_byte_buffer)) {
|
| + return false;
|
| + }
|
| + } else {
|
| + AddToBuffer(decoded, uri_content, k - 2, is_uri, two_byte_buffer);
|
| + }
|
| + } else {
|
| + two_byte_buffer->Add(code);
|
| + }
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
|
| + List<uint8_t>* one_byte_buffer,
|
| + List<uc16>* two_byte_buffer) {
|
| + DisallowHeapAllocation no_gc;
|
| + String::FlatContent uri_content = uri->GetFlatContent();
|
| +
|
| + int uri_length = uri->length();
|
| + for (int k = 0; k < uri_length; k++) {
|
| + uc16 code = uri_content.Get(k);
|
| + if (code == '%') {
|
| + uc16 decoded;
|
| + if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, &uri_content)) {
|
| + return false;
|
| + }
|
| +
|
| + if (decoded > unibrow::Utf8::kMaxOneByteChar) {
|
| + return IntoTwoByte(k, is_uri, uri_length, &uri_content,
|
| + two_byte_buffer);
|
| + }
|
| +
|
| + AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);
|
| + k += 2;
|
| + } else {
|
| + if (code > unibrow::Utf8::kMaxOneByteChar) {
|
| + return IntoTwoByte(k, is_uri, uri_length, &uri_content,
|
| + two_byte_buffer);
|
| + }
|
| + one_byte_buffer->Add(code);
|
| + }
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +} // anonymous namespace
|
| +
|
| +Object* Uri::Decode(Isolate* isolate, Handle<String> uri, bool is_uri) {
|
| + uri = String::Flatten(uri);
|
| + List<uint8_t> one_byte_buffer;
|
| + List<uc16> two_byte_buffer;
|
| +
|
| + if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {
|
| + THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError());
|
| + }
|
| +
|
| + Handle<String> left = isolate->factory()->InternalizeOneByteString(
|
| + one_byte_buffer.ToConstVector());
|
| +
|
| + Handle<String> right = isolate->factory()->InternalizeTwoByteString(
|
| + two_byte_buffer.ToConstVector());
|
| +
|
| + Handle<String> result;
|
| + ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
|
| + isolate, result, isolate->factory()->NewConsString(left, right));
|
| +
|
| + return *result;
|
| +}
|
| +
|
| } // namespace internal
|
| } // namespace v8
|
|
|