| Index: src/url_canon_icu.cc
|
| ===================================================================
|
| --- src/url_canon_icu.cc (revision 104)
|
| +++ src/url_canon_icu.cc (working copy)
|
| @@ -161,4 +161,47 @@
|
| }
|
| }
|
|
|
| +bool ReadUTFChar(const char* str, int* begin, int length,
|
| + unsigned* code_point_out) {
|
| + int code_point; // Avoids warning when U8_NEXT writes -1 to it.
|
| + U8_NEXT(str, *begin, length, code_point);
|
| + *code_point_out = static_cast<unsigned>(code_point);
|
| +
|
| + // The ICU macro above moves to the next char, we want to point to the last
|
| + // char consumed.
|
| + (*begin)--;
|
| +
|
| + // Validate the decoded value.
|
| + if (U_IS_UNICODE_CHAR(code_point))
|
| + return true;
|
| + *code_point_out = kUnicodeReplacementCharacter;
|
| + return false;
|
| +}
|
| +
|
| +bool ReadUTFChar(const char16* str, int* begin, int length,
|
| + unsigned* code_point) {
|
| + if (U16_IS_SURROGATE(str[*begin])) {
|
| + if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length ||
|
| + !U16_IS_TRAIL(str[*begin + 1])) {
|
| + // Invalid surrogate pair.
|
| + *code_point = kUnicodeReplacementCharacter;
|
| + return false;
|
| + } else {
|
| + // Valid surrogate pair.
|
| + *code_point = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]);
|
| + (*begin)++;
|
| + }
|
| + } else {
|
| + // Not a surrogate, just one 16-bit word.
|
| + *code_point = str[*begin];
|
| + }
|
| +
|
| + if (U_IS_UNICODE_CHAR(*code_point))
|
| + return true;
|
| +
|
| + // Invalid code point.
|
| + *code_point = kUnicodeReplacementCharacter;
|
| + return false;
|
| +}
|
| +
|
| } // namespace url_canon
|
|
|