| Index: src/url_util.cc
|
| ===================================================================
|
| --- src/url_util.cc (revision 137)
|
| +++ src/url_util.cc (working copy)
|
| @@ -33,6 +33,7 @@
|
| #include "googleurl/src/url_util.h"
|
|
|
| #include "base/logging.h"
|
| +#include "googleurl/src/url_canon_internal.h"
|
| #include "googleurl/src/url_file.h"
|
|
|
| namespace url_util {
|
| @@ -500,4 +501,53 @@
|
| return DoLowerCaseEqualsASCII(a_begin, a_end, b);
|
| }
|
|
|
| +void DecodeURLEscapeSequences(const char* input, int length,
|
| + url_canon::CanonOutputW* output) {
|
| + url_canon::RawCanonOutputT<char> unescaped_chars;
|
| + for (int i = 0; i < length; i++) {
|
| + if (input[i] == '%') {
|
| + unsigned char ch;
|
| + if (url_canon::DecodeEscaped(input, &i, length, &ch)) {
|
| + unescaped_chars.push_back(ch);
|
| + } else {
|
| + // Invalid escape sequence, copy the percent literal.
|
| + unescaped_chars.push_back('%');
|
| + }
|
| + } else {
|
| + // Regular non-escaped 8-bit character.
|
| + unescaped_chars.push_back(input[i]);
|
| + }
|
| + }
|
| +
|
| + // Convert that 8-bit to UTF-16. It's not clear IE does this at all to
|
| + // JavaScript URLs, but Firefox and Safari do.
|
| + for (int i = 0; i < unescaped_chars.length(); i++) {
|
| + unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));
|
| + if (uch < 0x80) {
|
| + // Non-UTF-8, just append directly
|
| + output->push_back(uch);
|
| + } else {
|
| + // next_ch will point to the last character of the decoded
|
| + // character.
|
| + int next_character = i;
|
| + unsigned code_point;
|
| + if (url_canon::ReadUTFChar(unescaped_chars.data(), &next_character,
|
| + unescaped_chars.length(), &code_point)) {
|
| + // Valid UTF-8 character, convert to UTF-16.
|
| + url_canon::AppendUTF16Value(code_point, output);
|
| + i = next_character;
|
| + } else {
|
| + // If there are any sequences that are not valid UTF-8, we keep
|
| + // invalid code points and promote to UTF-16. We copy all characters
|
| + // from the current position to the end of the identified sequence.
|
| + while (i < next_character) {
|
| + output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));
|
| + i++;
|
| + }
|
| + output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));
|
| + }
|
| + }
|
| + }
|
| +}
|
| +
|
| } // namespace url_util
|
|
|