Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(110)

Unified Diff: src/uri.cc

Issue 2027003002: [builtins] Migrate escape/unescape from uri.js to C++. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Minor refactoring Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/uri.h ('k') | src/v8.gyp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/uri.cc
diff --git a/src/uri.cc b/src/uri.cc
index 2736b2d74b744bf045cb33e30d6412c39b30e4d8..0107721888a809f624dbf5a3031f45fff3cdee6a 100644
--- a/src/uri.cc
+++ b/src/uri.cc
@@ -8,6 +8,7 @@
#include "src/handles.h"
#include "src/isolate-inl.h"
#include "src/list.h"
+#include "src/string-search.h"
namespace v8 {
namespace internal {
@@ -59,14 +60,14 @@ bool DecodeOctets(const uint8_t* octets, int length, List<uc16>* buffer) {
return true;
}
-bool TwoDigitHex(int index, String::FlatContent* uri_content, uc16* decoded) {
- char high = HexValue(uri_content->Get(index + 1));
- char low = HexValue(uri_content->Get(index + 2));
- if (high < 0 || low < 0) {
- return false;
- }
- *decoded = (high << 4) | low;
- return true;
+int TwoDigitHex(uc16 character1, uc16 character2) {
+ if (character1 > 'f') return -1;
+ int high = HexValue(character1);
+ if (high == -1) return -1;
+ if (character2 > 'f') return -1;
+ int low = HexValue(character2);
+ if (low == -1) return -1;
+ return (high << 4) + low;
}
template <typename T>
@@ -92,7 +93,9 @@ bool IntoTwoByte(int index, bool is_uri, int uri_length,
uc16 code = uri_content->Get(k);
if (code == '%') {
uc16 decoded;
- if (k + 2 >= uri_length || !TwoDigitHex(k, uri_content, &decoded)) {
+ if (k + 2 >= uri_length ||
+ (decoded = TwoDigitHex(uri_content->Get(k + 1),
+ uri_content->Get(k + 2))) < 0) {
return false;
}
k += 2;
@@ -109,7 +112,8 @@ bool IntoTwoByte(int index, bool is_uri, int uri_length,
uc16 continuation_byte;
if (uri_content->Get(++k) != '%' ||
- !TwoDigitHex(k, uri_content, &continuation_byte)) {
+ (continuation_byte = TwoDigitHex(uri_content->Get(k + 1),
+ uri_content->Get(k + 2))) < 0) {
return false;
}
k += 2;
@@ -140,7 +144,9 @@ bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
uc16 code = uri_content.Get(k);
if (code == '%') {
uc16 decoded;
- if (k + 2 >= uri_length || !TwoDigitHex(k, &uri_content, &decoded)) {
+ if (k + 2 >= uri_length ||
+ (decoded = TwoDigitHex(uri_content.Get(k + 1),
+ uri_content.Get(k + 2))) < 0) {
return false;
}
@@ -234,29 +240,29 @@ bool IsUriSeparator(uc16 c) {
}
}
-void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {
+void AddEncodedOctetToBuffer(uint8_t octet, List<uint8_t>* buffer) {
buffer->Add('%');
buffer->Add(HexCharOfValue(octet >> 4));
buffer->Add(HexCharOfValue(octet & 0x0F));
}
void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
- char s[4];
+ char s[4] = {};
int number_of_bytes;
number_of_bytes =
unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
for (int k = 0; k < number_of_bytes; k++) {
- AddHexEncodedToBuffer(s[k], buffer);
+ AddEncodedOctetToBuffer(s[k], buffer);
}
}
void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
- char s[4];
+ char s[4] = {};
int number_of_bytes =
unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
unibrow::Utf16::kNoPreviousCharacter, false);
for (int k = 0; k < number_of_bytes; k++) {
- AddHexEncodedToBuffer(s[k], buffer);
+ AddEncodedOctetToBuffer(s[k], buffer);
}
}
@@ -301,5 +307,199 @@ MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri,
return isolate->factory()->NewStringFromOneByte(buffer.ToConstVector());
}
+namespace { // Anonymous namespace for Escape and Unescape
+
+template <typename Char>
+int UnescapeChar(Vector<const Char> vector, int i, int length, int* step) {
+ uint16_t character = vector[i];
+ int32_t hi = 0;
+ int32_t lo = 0;
+ if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
+ (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) > -1 &&
+ (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) > -1) {
+ *step = 6;
+ return (hi << 8) + lo;
+ } else if (character == '%' && i <= length - 3 &&
+ (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) > -1) {
+ *step = 3;
+ return lo;
+ } else {
+ *step = 1;
+ return character;
+ }
+}
+
+template <typename Char>
+MaybeHandle<String> UnescapeSlow(Isolate* isolate, Handle<String> string,
+ int start_index) {
+ bool one_byte = true;
+ int length = string->length();
+
+ int unescaped_length = 0;
+ {
+ DisallowHeapAllocation no_allocation;
+ Vector<const Char> vector = string->GetCharVector<Char>();
+ for (int i = start_index; i < length; unescaped_length++) {
+ int step;
+ if (UnescapeChar(vector, i, length, &step) >
+ String::kMaxOneByteCharCode) {
+ one_byte = false;
+ }
+ i += step;
+ }
+ }
+
+ DCHECK(start_index < length);
+ Handle<String> first_part =
+ isolate->factory()->NewProperSubString(string, 0, start_index);
+
+ int dest_position = 0;
+ Handle<String> second_part;
+ DCHECK(unescaped_length <= String::kMaxLength);
+ if (one_byte) {
+ Handle<SeqOneByteString> dest = isolate->factory()
+ ->NewRawOneByteString(unescaped_length)
+ .ToHandleChecked();
+ DisallowHeapAllocation no_allocation;
+ Vector<const Char> vector = string->GetCharVector<Char>();
+ for (int i = start_index; i < length; dest_position++) {
+ int step;
+ dest->SeqOneByteStringSet(dest_position,
+ UnescapeChar(vector, i, length, &step));
+ i += step;
+ }
+ second_part = dest;
+ } else {
+ Handle<SeqTwoByteString> dest = isolate->factory()
+ ->NewRawTwoByteString(unescaped_length)
+ .ToHandleChecked();
+ DisallowHeapAllocation no_allocation;
+ Vector<const Char> vector = string->GetCharVector<Char>();
+ for (int i = start_index; i < length; dest_position++) {
+ int step;
+ dest->SeqTwoByteStringSet(dest_position,
+ UnescapeChar(vector, i, length, &step));
+ i += step;
+ }
+ second_part = dest;
+ }
+ return isolate->factory()->NewConsString(first_part, second_part);
+}
+
+bool IsNotEscaped(uint16_t c) {
+ if (IsAlphaNumeric(c)) {
+ return true;
+ }
+ // @*_+-./
+ switch (c) {
+ case '@':
+ case '*':
+ case '_':
+ case '+':
+ case '-':
+ case '.':
+ case '/':
+ return true;
+ default:
+ return false;
+ }
+}
+
+template <typename Char>
+static MaybeHandle<String> UnescapePrivate(Isolate* isolate,
+ Handle<String> source) {
+ int index;
+ {
+ DisallowHeapAllocation no_allocation;
+ StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
+ index = search.Search(source->GetCharVector<Char>(), 0);
+ if (index < 0) return source;
+ }
+ return UnescapeSlow<Char>(isolate, source, index);
+}
+
+template <typename Char>
+static MaybeHandle<String> EscapePrivate(Isolate* isolate,
+ Handle<String> string) {
+ DCHECK(string->IsFlat());
+ int escaped_length = 0;
+ int length = string->length();
+
+ {
+ DisallowHeapAllocation no_allocation;
+ Vector<const Char> vector = string->GetCharVector<Char>();
+ for (int i = 0; i < length; i++) {
+ uint16_t c = vector[i];
+ if (c >= 256) {
+ escaped_length += 6;
+ } else if (IsNotEscaped(c)) {
+ escaped_length++;
+ } else {
+ escaped_length += 3;
+ }
+
+ // We don't allow strings that are longer than a maximal length.
+ DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
+ if (escaped_length > String::kMaxLength) break; // Provoke exception.
+ }
+ }
+
+ // No length change implies no change. Return original string if no change.
+ if (escaped_length == length) return string;
+
+ Handle<SeqOneByteString> dest;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
+ String);
+ int dest_position = 0;
+
+ {
+ DisallowHeapAllocation no_allocation;
+ Vector<const Char> vector = string->GetCharVector<Char>();
+ for (int i = 0; i < length; i++) {
+ uint16_t c = vector[i];
+ if (c >= 256) {
+ dest->SeqOneByteStringSet(dest_position, '%');
+ dest->SeqOneByteStringSet(dest_position + 1, 'u');
+ dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c >> 12));
+ dest->SeqOneByteStringSet(dest_position + 3,
+ HexCharOfValue((c >> 8) & 0xf));
+ dest->SeqOneByteStringSet(dest_position + 4,
+ HexCharOfValue((c >> 4) & 0xf));
+ dest->SeqOneByteStringSet(dest_position + 5, HexCharOfValue(c & 0xf));
+ dest_position += 6;
+ } else if (IsNotEscaped(c)) {
+ dest->SeqOneByteStringSet(dest_position, c);
+ dest_position++;
+ } else {
+ dest->SeqOneByteStringSet(dest_position, '%');
+ dest->SeqOneByteStringSet(dest_position + 1, HexCharOfValue(c >> 4));
+ dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c & 0xf));
+ dest_position += 3;
+ }
+ }
+ }
+
+ return dest;
+}
+
+} // Anonymous namespace
+
+MaybeHandle<String> Uri::Escape(Isolate* isolate, Handle<String> string) {
+ Handle<String> result;
+ string = String::Flatten(string);
+ return string->IsOneByteRepresentationUnderneath()
+ ? EscapePrivate<uint8_t>(isolate, string)
+ : EscapePrivate<uc16>(isolate, string);
+}
+
+MaybeHandle<String> Uri::Unescape(Isolate* isolate, Handle<String> string) {
+ Handle<String> result;
+ string = String::Flatten(string);
+ return string->IsOneByteRepresentationUnderneath()
+ ? UnescapePrivate<uint8_t>(isolate, string)
+ : UnescapePrivate<uc16>(isolate, string);
+}
+
} // namespace internal
} // namespace v8
« no previous file with comments | « src/uri.h ('k') | src/v8.gyp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698