Index: src/uri.cc |
diff --git a/src/uri.cc b/src/uri.cc |
index 13360fa0a1b56811301ccb873f8f28517c03be3b..2736b2d74b744bf045cb33e30d6412c39b30e4d8 100644 |
--- a/src/uri.cc |
+++ b/src/uri.cc |
@@ -12,6 +12,187 @@ |
namespace v8 { |
namespace internal { |
+namespace { // anonymous namespace for DecodeURI helper functions |
+bool IsReservedPredicate(uc16 c) { |
+ switch (c) { |
+ case '#': |
+ case '$': |
+ case '&': |
+ case '+': |
+ case ',': |
+ case '/': |
+ case ':': |
+ case ';': |
+ case '=': |
+ case '?': |
+ case '@': |
+ return true; |
+ default: |
+ return false; |
+ } |
+} |
+ |
+bool IsReplacementCharacter(const uint8_t* octets, int length) { |
+ // The replacement character is at codepoint U+FFFD in the Unicode Specials |
+ // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD. |
+ if (length != 3 || octets[0] != 0xef || octets[1] != 0xbf || |
+ octets[2] != 0xbd) { |
+ return false; |
+ } |
+ return true; |
+} |
+ |
+bool DecodeOctets(const uint8_t* octets, int length, List<uc16>* buffer) { |
+ size_t cursor = 0; |
+ uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor); |
+ if (value == unibrow::Utf8::kBadChar && |
+ !IsReplacementCharacter(octets, length)) { |
+ return false; |
+ } |
+ |
+ if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) { |
+ buffer->Add(value); |
+ } else { |
+ buffer->Add(unibrow::Utf16::LeadSurrogate(value)); |
+ buffer->Add(unibrow::Utf16::TrailSurrogate(value)); |
+ } |
+ return true; |
+} |
+ |
+bool TwoDigitHex(int index, String::FlatContent* uri_content, uc16* decoded) { |
+ char high = HexValue(uri_content->Get(index + 1)); |
+ char low = HexValue(uri_content->Get(index + 2)); |
+ if (high < 0 || low < 0) { |
+ return false; |
+ } |
+ *decoded = (high << 4) | low; |
+ return true; |
+} |
+ |
+template <typename T> |
+void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index, |
+ bool is_uri, List<T>* buffer) { |
+ if (is_uri && IsReservedPredicate(decoded)) { |
+ buffer->Add('%'); |
+ uc16 first = uri_content->Get(index + 1); |
+ uc16 second = uri_content->Get(index + 2); |
+ DCHECK_GT(std::numeric_limits<T>::max(), first); |
+ DCHECK_GT(std::numeric_limits<T>::max(), second); |
+ |
+ buffer->Add(first); |
+ buffer->Add(second); |
+ } else { |
+ buffer->Add(decoded); |
+ } |
+} |
+ |
+bool IntoTwoByte(int index, bool is_uri, int uri_length, |
+ String::FlatContent* uri_content, List<uc16>* buffer) { |
+ for (int k = index; k < uri_length; k++) { |
+ uc16 code = uri_content->Get(k); |
+ if (code == '%') { |
+ uc16 decoded; |
+ if (k + 2 >= uri_length || !TwoDigitHex(k, uri_content, &decoded)) { |
+ return false; |
+ } |
+ k += 2; |
+ if (decoded > unibrow::Utf8::kMaxOneByteChar) { |
+ uint8_t octets[unibrow::Utf8::kMaxEncodedSize]; |
+ octets[0] = decoded; |
+ |
+ int number_of_continuation_bytes = 0; |
+ while ((decoded << ++number_of_continuation_bytes) & 0x80) { |
+ if (number_of_continuation_bytes > 3 || k + 3 >= uri_length) { |
+ return false; |
+ } |
+ |
+ uc16 continuation_byte; |
+ |
+ if (uri_content->Get(++k) != '%' || |
+ !TwoDigitHex(k, uri_content, &continuation_byte)) { |
+ return false; |
+ } |
+ k += 2; |
+ octets[number_of_continuation_bytes] = continuation_byte; |
+ } |
+ |
+ if (!DecodeOctets(octets, number_of_continuation_bytes, buffer)) { |
+ return false; |
+ } |
+ } else { |
+ AddToBuffer(decoded, uri_content, k - 2, is_uri, buffer); |
+ } |
+ } else { |
+ buffer->Add(code); |
+ } |
+ } |
+ return true; |
+} |
+ |
+bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri, |
+ List<uint8_t>* one_byte_buffer, |
+ List<uc16>* two_byte_buffer) { |
+ DisallowHeapAllocation no_gc; |
+ String::FlatContent uri_content = uri->GetFlatContent(); |
+ |
+ int uri_length = uri->length(); |
+ for (int k = 0; k < uri_length; k++) { |
+ uc16 code = uri_content.Get(k); |
+ if (code == '%') { |
+ uc16 decoded; |
+ if (k + 2 >= uri_length || !TwoDigitHex(k, &uri_content, &decoded)) { |
+ return false; |
+ } |
+ |
+ if (decoded > unibrow::Utf8::kMaxOneByteChar) { |
+ return IntoTwoByte(k, is_uri, uri_length, &uri_content, |
+ two_byte_buffer); |
+ } |
+ |
+ AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer); |
+ k += 2; |
+ } else { |
+ if (code > unibrow::Utf8::kMaxOneByteChar) { |
+ return IntoTwoByte(k, is_uri, uri_length, &uri_content, |
+ two_byte_buffer); |
+ } |
+ one_byte_buffer->Add(code); |
+ } |
+ } |
+ return true; |
+} |
+ |
+} // anonymous namespace |
+ |
+MaybeHandle<String> Uri::Decode(Isolate* isolate, Handle<String> uri, |
+ bool is_uri) { |
+ uri = String::Flatten(uri); |
+ List<uint8_t> one_byte_buffer; |
+ List<uc16> two_byte_buffer; |
+ |
+ if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) { |
+ THROW_NEW_ERROR(isolate, NewURIError(), String); |
+ } |
+ |
+ if (two_byte_buffer.is_empty()) { |
+ return isolate->factory()->NewStringFromOneByte( |
+ one_byte_buffer.ToConstVector()); |
+ } |
+ |
+ Handle<SeqTwoByteString> result; |
+ ASSIGN_RETURN_ON_EXCEPTION( |
+ isolate, result, isolate->factory()->NewRawTwoByteString( |
+ one_byte_buffer.length() + two_byte_buffer.length()), |
+ String); |
+ |
+ CopyChars(result->GetChars(), one_byte_buffer.ToConstVector().start(), |
+ one_byte_buffer.length()); |
+ CopyChars(result->GetChars() + one_byte_buffer.length(), |
+ two_byte_buffer.ToConstVector().start(), two_byte_buffer.length()); |
+ |
+ return result; |
+} |
+ |
namespace { // anonymous namespace for EncodeURI helper functions |
bool IsUnescapePredicateInUriComponent(uc16 c) { |
if (IsAlphaNumeric(c)) { |
@@ -60,31 +241,23 @@ void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) { |
} |
void EncodeSingle(uc16 c, List<uint8_t>* buffer) { |
- uint8_t x = (c >> 12) & 0xF; |
- uint8_t y = (c >> 6) & 63; |
- uint8_t z = c & 63; |
- if (c <= 0x007F) { |
- AddHexEncodedToBuffer(c, buffer); |
- } else if (c <= 0x07FF) { |
- AddHexEncodedToBuffer(y + 192, buffer); |
- AddHexEncodedToBuffer(z + 128, buffer); |
- } else { |
- AddHexEncodedToBuffer(x + 224, buffer); |
- AddHexEncodedToBuffer(y + 128, buffer); |
- AddHexEncodedToBuffer(z + 128, buffer); |
+ char s[4]; |
+ int number_of_bytes; |
+ number_of_bytes = |
+ unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false); |
+ for (int k = 0; k < number_of_bytes; k++) { |
+ AddHexEncodedToBuffer(s[k], buffer); |
} |
} |
void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) { |
- uint8_t u = ((cc1 >> 6) & 0xF) + 1; |
- uint8_t w = (cc1 >> 2) & 0xF; |
- uint8_t x = cc1 & 3; |
- uint8_t y = (cc2 >> 6) & 0xF; |
- uint8_t z = cc2 & 63; |
- AddHexEncodedToBuffer((u >> 2) + 240, buffer); |
- AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer); |
- AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer); |
- AddHexEncodedToBuffer(z + 128, buffer); |
+ char s[4]; |
+ int number_of_bytes = |
+ unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2), |
+ unibrow::Utf16::kNoPreviousCharacter, false); |
+ for (int k = 0; k < number_of_bytes; k++) { |
+ AddHexEncodedToBuffer(s[k], buffer); |
+ } |
} |
} // anonymous namespace |