| Index: src/uri.cc
|
| diff --git a/src/uri.cc b/src/uri.cc
|
| index 2736b2d74b744bf045cb33e30d6412c39b30e4d8..0107721888a809f624dbf5a3031f45fff3cdee6a 100644
|
| --- a/src/uri.cc
|
| +++ b/src/uri.cc
|
| @@ -8,6 +8,7 @@
|
| #include "src/handles.h"
|
| #include "src/isolate-inl.h"
|
| #include "src/list.h"
|
| +#include "src/string-search.h"
|
|
|
| namespace v8 {
|
| namespace internal {
|
| @@ -59,14 +60,14 @@ bool DecodeOctets(const uint8_t* octets, int length, List<uc16>* buffer) {
|
| return true;
|
| }
|
|
|
| -bool TwoDigitHex(int index, String::FlatContent* uri_content, uc16* decoded) {
|
| - char high = HexValue(uri_content->Get(index + 1));
|
| - char low = HexValue(uri_content->Get(index + 2));
|
| - if (high < 0 || low < 0) {
|
| - return false;
|
| - }
|
| - *decoded = (high << 4) | low;
|
| - return true;
|
| +int TwoDigitHex(uc16 character1, uc16 character2) {
|
| + if (character1 > 'f') return -1;
|
| + int high = HexValue(character1);
|
| + if (high == -1) return -1;
|
| + if (character2 > 'f') return -1;
|
| + int low = HexValue(character2);
|
| + if (low == -1) return -1;
|
| + return (high << 4) + low;
|
| }
|
|
|
| template <typename T>
|
| @@ -92,7 +93,9 @@ bool IntoTwoByte(int index, bool is_uri, int uri_length,
|
| uc16 code = uri_content->Get(k);
|
| if (code == '%') {
|
| uc16 decoded;
|
| - if (k + 2 >= uri_length || !TwoDigitHex(k, uri_content, &decoded)) {
|
| + if (k + 2 >= uri_length ||
|
| + (decoded = TwoDigitHex(uri_content->Get(k + 1),
|
| + uri_content->Get(k + 2))) < 0) {
|
| return false;
|
| }
|
| k += 2;
|
| @@ -109,7 +112,8 @@ bool IntoTwoByte(int index, bool is_uri, int uri_length,
|
| uc16 continuation_byte;
|
|
|
| if (uri_content->Get(++k) != '%' ||
|
| - !TwoDigitHex(k, uri_content, &continuation_byte)) {
|
| + (continuation_byte = TwoDigitHex(uri_content->Get(k + 1),
|
| + uri_content->Get(k + 2))) < 0) {
|
| return false;
|
| }
|
| k += 2;
|
| @@ -140,7 +144,9 @@ bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
|
| uc16 code = uri_content.Get(k);
|
| if (code == '%') {
|
| uc16 decoded;
|
| - if (k + 2 >= uri_length || !TwoDigitHex(k, &uri_content, &decoded)) {
|
| + if (k + 2 >= uri_length ||
|
| + (decoded = TwoDigitHex(uri_content.Get(k + 1),
|
| + uri_content.Get(k + 2))) < 0) {
|
| return false;
|
| }
|
|
|
| @@ -234,29 +240,29 @@ bool IsUriSeparator(uc16 c) {
|
| }
|
| }
|
|
|
| -void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {
|
| +void AddEncodedOctetToBuffer(uint8_t octet, List<uint8_t>* buffer) {
|
| buffer->Add('%');
|
| buffer->Add(HexCharOfValue(octet >> 4));
|
| buffer->Add(HexCharOfValue(octet & 0x0F));
|
| }
|
|
|
| void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
|
| - char s[4];
|
| + char s[4] = {};
|
| int number_of_bytes;
|
| number_of_bytes =
|
| unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
|
| for (int k = 0; k < number_of_bytes; k++) {
|
| - AddHexEncodedToBuffer(s[k], buffer);
|
| + AddEncodedOctetToBuffer(s[k], buffer);
|
| }
|
| }
|
|
|
| void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
|
| - char s[4];
|
| + char s[4] = {};
|
| int number_of_bytes =
|
| unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
|
| unibrow::Utf16::kNoPreviousCharacter, false);
|
| for (int k = 0; k < number_of_bytes; k++) {
|
| - AddHexEncodedToBuffer(s[k], buffer);
|
| + AddEncodedOctetToBuffer(s[k], buffer);
|
| }
|
| }
|
|
|
| @@ -301,5 +307,199 @@ MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri,
|
| return isolate->factory()->NewStringFromOneByte(buffer.ToConstVector());
|
| }
|
|
|
| +namespace { // Anonymous namespace for Escape and Unescape
|
| +
|
| +template <typename Char>
|
| +int UnescapeChar(Vector<const Char> vector, int i, int length, int* step) {
|
| + uint16_t character = vector[i];
|
| + int32_t hi = 0;
|
| + int32_t lo = 0;
|
| + if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
|
| + (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) > -1 &&
|
| + (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) > -1) {
|
| + *step = 6;
|
| + return (hi << 8) + lo;
|
| + } else if (character == '%' && i <= length - 3 &&
|
| + (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) > -1) {
|
| + *step = 3;
|
| + return lo;
|
| + } else {
|
| + *step = 1;
|
| + return character;
|
| + }
|
| +}
|
| +
|
| +template <typename Char>
|
| +MaybeHandle<String> UnescapeSlow(Isolate* isolate, Handle<String> string,
|
| + int start_index) {
|
| + bool one_byte = true;
|
| + int length = string->length();
|
| +
|
| + int unescaped_length = 0;
|
| + {
|
| + DisallowHeapAllocation no_allocation;
|
| + Vector<const Char> vector = string->GetCharVector<Char>();
|
| + for (int i = start_index; i < length; unescaped_length++) {
|
| + int step;
|
| + if (UnescapeChar(vector, i, length, &step) >
|
| + String::kMaxOneByteCharCode) {
|
| + one_byte = false;
|
| + }
|
| + i += step;
|
| + }
|
| + }
|
| +
|
| + DCHECK(start_index < length);
|
| + Handle<String> first_part =
|
| + isolate->factory()->NewProperSubString(string, 0, start_index);
|
| +
|
| + int dest_position = 0;
|
| + Handle<String> second_part;
|
| + DCHECK(unescaped_length <= String::kMaxLength);
|
| + if (one_byte) {
|
| + Handle<SeqOneByteString> dest = isolate->factory()
|
| + ->NewRawOneByteString(unescaped_length)
|
| + .ToHandleChecked();
|
| + DisallowHeapAllocation no_allocation;
|
| + Vector<const Char> vector = string->GetCharVector<Char>();
|
| + for (int i = start_index; i < length; dest_position++) {
|
| + int step;
|
| + dest->SeqOneByteStringSet(dest_position,
|
| + UnescapeChar(vector, i, length, &step));
|
| + i += step;
|
| + }
|
| + second_part = dest;
|
| + } else {
|
| + Handle<SeqTwoByteString> dest = isolate->factory()
|
| + ->NewRawTwoByteString(unescaped_length)
|
| + .ToHandleChecked();
|
| + DisallowHeapAllocation no_allocation;
|
| + Vector<const Char> vector = string->GetCharVector<Char>();
|
| + for (int i = start_index; i < length; dest_position++) {
|
| + int step;
|
| + dest->SeqTwoByteStringSet(dest_position,
|
| + UnescapeChar(vector, i, length, &step));
|
| + i += step;
|
| + }
|
| + second_part = dest;
|
| + }
|
| + return isolate->factory()->NewConsString(first_part, second_part);
|
| +}
|
| +
|
| +bool IsNotEscaped(uint16_t c) {
|
| + if (IsAlphaNumeric(c)) {
|
| + return true;
|
| + }
|
| + // @*_+-./
|
| + switch (c) {
|
| + case '@':
|
| + case '*':
|
| + case '_':
|
| + case '+':
|
| + case '-':
|
| + case '.':
|
| + case '/':
|
| + return true;
|
| + default:
|
| + return false;
|
| + }
|
| +}
|
| +
|
| +template <typename Char>
|
| +static MaybeHandle<String> UnescapePrivate(Isolate* isolate,
|
| + Handle<String> source) {
|
| + int index;
|
| + {
|
| + DisallowHeapAllocation no_allocation;
|
| + StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
|
| + index = search.Search(source->GetCharVector<Char>(), 0);
|
| + if (index < 0) return source;
|
| + }
|
| + return UnescapeSlow<Char>(isolate, source, index);
|
| +}
|
| +
|
| +template <typename Char>
|
| +static MaybeHandle<String> EscapePrivate(Isolate* isolate,
|
| + Handle<String> string) {
|
| + DCHECK(string->IsFlat());
|
| + int escaped_length = 0;
|
| + int length = string->length();
|
| +
|
| + {
|
| + DisallowHeapAllocation no_allocation;
|
| + Vector<const Char> vector = string->GetCharVector<Char>();
|
| + for (int i = 0; i < length; i++) {
|
| + uint16_t c = vector[i];
|
| + if (c >= 256) {
|
| + escaped_length += 6;
|
| + } else if (IsNotEscaped(c)) {
|
| + escaped_length++;
|
| + } else {
|
| + escaped_length += 3;
|
| + }
|
| +
|
| + // We don't allow strings that are longer than a maximal length.
|
| + DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
|
| + if (escaped_length > String::kMaxLength) break; // Provoke exception.
|
| + }
|
| + }
|
| +
|
| + // No length change implies no change. Return original string if no change.
|
| + if (escaped_length == length) return string;
|
| +
|
| + Handle<SeqOneByteString> dest;
|
| + ASSIGN_RETURN_ON_EXCEPTION(
|
| + isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
|
| + String);
|
| + int dest_position = 0;
|
| +
|
| + {
|
| + DisallowHeapAllocation no_allocation;
|
| + Vector<const Char> vector = string->GetCharVector<Char>();
|
| + for (int i = 0; i < length; i++) {
|
| + uint16_t c = vector[i];
|
| + if (c >= 256) {
|
| + dest->SeqOneByteStringSet(dest_position, '%');
|
| + dest->SeqOneByteStringSet(dest_position + 1, 'u');
|
| + dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c >> 12));
|
| + dest->SeqOneByteStringSet(dest_position + 3,
|
| + HexCharOfValue((c >> 8) & 0xf));
|
| + dest->SeqOneByteStringSet(dest_position + 4,
|
| + HexCharOfValue((c >> 4) & 0xf));
|
| + dest->SeqOneByteStringSet(dest_position + 5, HexCharOfValue(c & 0xf));
|
| + dest_position += 6;
|
| + } else if (IsNotEscaped(c)) {
|
| + dest->SeqOneByteStringSet(dest_position, c);
|
| + dest_position++;
|
| + } else {
|
| + dest->SeqOneByteStringSet(dest_position, '%');
|
| + dest->SeqOneByteStringSet(dest_position + 1, HexCharOfValue(c >> 4));
|
| + dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c & 0xf));
|
| + dest_position += 3;
|
| + }
|
| + }
|
| + }
|
| +
|
| + return dest;
|
| +}
|
| +
|
| +} // Anonymous namespace
|
| +
|
| +MaybeHandle<String> Uri::Escape(Isolate* isolate, Handle<String> string) {
|
| + Handle<String> result;
|
| + string = String::Flatten(string);
|
| + return string->IsOneByteRepresentationUnderneath()
|
| + ? EscapePrivate<uint8_t>(isolate, string)
|
| + : EscapePrivate<uc16>(isolate, string);
|
| +}
|
| +
|
| +MaybeHandle<String> Uri::Unescape(Isolate* isolate, Handle<String> string) {
|
| + Handle<String> result;
|
| + string = String::Flatten(string);
|
| + return string->IsOneByteRepresentationUnderneath()
|
| + ? UnescapePrivate<uint8_t>(isolate, string)
|
| + : UnescapePrivate<uc16>(isolate, string);
|
| +}
|
| +
|
| } // namespace internal
|
| } // namespace v8
|
|
|