Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(922)

Unified Diff: src/uri.cc

Issue 1994733003: Rewrite decodeURL as builtin function, remove now unused runtime functions. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Use Utf8::Encode() and ValueOf() Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/uri.cc
diff --git a/src/uri.cc b/src/uri.cc
index c459be5e53f4ad915d669a0122cec77b1d1889e2..152327ea6d1584f1d0c208fce9c04c52ee4dd77c 100644
--- a/src/uri.cc
+++ b/src/uri.cc
@@ -60,31 +60,23 @@ void AddHexEncodedToBuffer(uint8_t octet, List<uint8_t>* buffer) {
}
void EncodeSingle(uc16 c, List<uint8_t>* buffer) {
- uint8_t x = (c >> 12) & 0xF;
- uint8_t y = (c >> 6) & 63;
- uint8_t z = c & 63;
- if (c <= 0x007F) {
- AddHexEncodedToBuffer(c, buffer);
- } else if (c <= 0x07FF) {
- AddHexEncodedToBuffer(y + 192, buffer);
- AddHexEncodedToBuffer(z + 128, buffer);
- } else {
- AddHexEncodedToBuffer(x + 224, buffer);
- AddHexEncodedToBuffer(y + 128, buffer);
- AddHexEncodedToBuffer(z + 128, buffer);
+ char s[4];
+ int number_of_bytes;
+ number_of_bytes =
+ unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
+ for (int k = 0; k < number_of_bytes; k++) {
+ AddHexEncodedToBuffer(s[k], buffer);
}
}
void EncodePair(uc16 cc1, uc16 cc2, List<uint8_t>* buffer) {
- uint8_t u = ((cc1 >> 6) & 0xF) + 1;
- uint8_t w = (cc1 >> 2) & 0xF;
- uint8_t x = cc1 & 3;
- uint8_t y = (cc2 >> 6) & 0xF;
- uint8_t z = cc2 & 63;
- AddHexEncodedToBuffer((u >> 2) + 240, buffer);
- AddHexEncodedToBuffer((((u & 3) << 4) | w) + 128, buffer);
- AddHexEncodedToBuffer(((x << 4) | y) + 128, buffer);
- AddHexEncodedToBuffer(z + 128, buffer);
+ char s[4];
+ int number_of_bytes =
+ unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
+ unibrow::Utf16::kNoPreviousCharacter, false);
+ for (int k = 0; k < number_of_bytes; k++) {
+ AddHexEncodedToBuffer(s[k], buffer);
+ }
}
} // anonymous namespace
@@ -131,5 +123,178 @@ Object* Uri::Encode(Isolate* isolate, Handle<String> uri, bool is_uri) {
return *result;
}
+namespace { // anonymous namespace for DecodeURI helper functions
+
+bool IsReservedPredicate(uc16 c) {
+ switch (c) {
+ case '#':
+ case '$':
+ case '&':
+ case '+':
+ case ',':
+ case '/':
+ case ':':
+ case ';':
+ case '=':
+ case '?':
+ case '@':
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool IsRepalcementCharacter(List<uint8_t>* octets) {
+ // 0xFFFD is %ef%bf%bd
+ if (octets->length() != 3 || octets->at(0) != 0xef || octets->at(1) != 0xbf ||
+ octets->at(2) != 0xbd) {
+ return false;
+ }
+ return true;
+}
+
+bool DecodeOctets(List<uint8_t>* octets, List<uc16>* two_byte_buffer) {
+ size_t cursor = 0;
+ uc32 value = unibrow::Utf8::ValueOf(octets->ToConstVector().start(),
+ octets->length(), &cursor);
+ // kBadChar is the Replacement Character, which is the decoding of
+ // valid input %ef%bf%bd
+ if (value == unibrow::Utf8::kBadChar && !IsRepalcementCharacter(octets)) {
+ return false;
+ }
+
+ if (value <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ two_byte_buffer->Add(value);
+ } else {
+ two_byte_buffer->Add(unibrow::Utf16::LeadSurrogate(value));
+ two_byte_buffer->Add(unibrow::Utf16::TrailSurrogate(value));
+ }
+ return true;
+}
+
+bool TwoDigitHex(uc16& decoded, int k, String::FlatContent* uri_content) {
+ char high = HexValue(uri_content->Get(k + 1));
+ char low = HexValue(uri_content->Get(k + 2));
+ if (high < 0 || low < 0) {
+ return false;
+ }
+ decoded = (high << 4) | low;
+ return true;
+}
+
+template <typename T>
+void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int k,
+ bool is_uri, List<T>* buffer) {
+ if (is_uri && IsReservedPredicate(decoded)) {
+ buffer->Add('%');
+ buffer->Add(uri_content->Get(k + 1));
+ buffer->Add(uri_content->Get(k + 2));
+ } else {
+ buffer->Add(decoded);
+ }
+}
+
+bool IntoTwoByte(int index, bool is_uri, int uri_length,
+ String::FlatContent* uri_content,
+ List<uc16>* two_byte_buffer) {
+ for (int k = index; k < uri_length; k++) {
+ uc16 code = uri_content->Get(k);
+ if (code == '%') {
+ uc16 decoded;
+ if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, uri_content)) {
+ return false;
+ }
+ k += 2;
+ if (decoded > unibrow::Utf8::kMaxOneByteChar) {
+ int n = 0;
+ while (((decoded << ++n) & 0x80) != 0) {
+ }
+ if (n == 1 || n > 4 || k + 3 * (n - 1) >= uri_length) {
+ return false;
+ }
+ List<uint8_t> octets;
+ octets.Add(decoded);
+
+ for (int i = 1; i < n; i++) {
+ uc16 decodedTrail;
+
+ if (uri_content->Get(++k) != '%' || k + 2 >= uri_length ||
+ !TwoDigitHex(decodedTrail, k, uri_content)) {
+ return false;
+ }
+ k += 2;
+ octets.Add(decodedTrail);
+ }
+
+ if (!DecodeOctets(&octets, two_byte_buffer)) {
+ return false;
+ }
+ } else {
+ AddToBuffer(decoded, uri_content, k - 2, is_uri, two_byte_buffer);
+ }
+ } else {
+ two_byte_buffer->Add(code);
+ }
+ }
+ return true;
+}
+
+bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
+ List<uint8_t>* one_byte_buffer,
+ List<uc16>* two_byte_buffer) {
+ DisallowHeapAllocation no_gc;
+ String::FlatContent uri_content = uri->GetFlatContent();
+
+ int uri_length = uri->length();
+ for (int k = 0; k < uri_length; k++) {
+ uc16 code = uri_content.Get(k);
+ if (code == '%') {
+ uc16 decoded;
+ if (k + 2 >= uri_length || !TwoDigitHex(decoded, k, &uri_content)) {
+ return false;
+ }
+
+ if (decoded > unibrow::Utf8::kMaxOneByteChar) {
+ return IntoTwoByte(k, is_uri, uri_length, &uri_content,
+ two_byte_buffer);
+ }
+
+ AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);
+ k += 2;
+ } else {
+ if (code > unibrow::Utf8::kMaxOneByteChar) {
+ return IntoTwoByte(k, is_uri, uri_length, &uri_content,
+ two_byte_buffer);
+ }
+ one_byte_buffer->Add(code);
+ }
+ }
+ return true;
+}
+
+} // anonymous namespace
+
+Object* Uri::Decode(Isolate* isolate, Handle<String> uri, bool is_uri) {
+ uri = String::Flatten(uri);
+ List<uint8_t> one_byte_buffer;
+ List<uc16> two_byte_buffer;
+
+ if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {
+ THROW_NEW_ERROR_RETURN_FAILURE(isolate, NewURIError());
+ }
+
+ Handle<String> left = isolate->factory()->InternalizeOneByteString(
+ one_byte_buffer.ToConstVector());
+
+ Handle<String> right = isolate->factory()->InternalizeTwoByteString(
+ two_byte_buffer.ToConstVector());
+
+ Handle<String> result;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, result, isolate->factory()->NewConsString(left, right));
+
+ return *result;
+}
+
} // namespace internal
} // namespace v8
« src/uri.h ('K') | « src/uri.h ('k') | test/cctest/compiler/test-run-intrinsics.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698