| Index: src/url_canon_icu.cc | 
| =================================================================== | 
| --- src/url_canon_icu.cc	(revision 104) | 
| +++ src/url_canon_icu.cc	(working copy) | 
| @@ -161,4 +161,47 @@ | 
| } | 
| } | 
|  | 
| +bool ReadUTFChar(const char* str, int* begin, int length, | 
| +                 unsigned* code_point_out) { | 
| +  int code_point;  // Avoids warning when U8_NEXT writes -1 to it. | 
| +  U8_NEXT(str, *begin, length, code_point); | 
| +  *code_point_out = static_cast<unsigned>(code_point); | 
| + | 
| +  // The ICU macro above moves to the next char, we want to point to the last | 
| +  // char consumed. | 
| +  (*begin)--; | 
| + | 
| +  // Validate the decoded value. | 
| +  if (U_IS_UNICODE_CHAR(code_point)) | 
| +    return true; | 
| +  *code_point_out = kUnicodeReplacementCharacter; | 
| +  return false; | 
| +} | 
| + | 
| +bool ReadUTFChar(const char16* str, int* begin, int length, | 
| +                 unsigned* code_point) { | 
| +  if (U16_IS_SURROGATE(str[*begin])) { | 
| +    if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length || | 
| +        !U16_IS_TRAIL(str[*begin + 1])) { | 
| +      // Invalid surrogate pair. | 
| +      *code_point = kUnicodeReplacementCharacter; | 
| +      return false; | 
| +    } else { | 
| +      // Valid surrogate pair. | 
| +      *code_point = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]); | 
| +      (*begin)++; | 
| +    } | 
| +  } else { | 
| +    // Not a surrogate, just one 16-bit word. | 
| +    *code_point = str[*begin]; | 
| +  } | 
| + | 
| +  if (U_IS_UNICODE_CHAR(*code_point)) | 
| +    return true; | 
| + | 
| +  // Invalid code point. | 
| +  *code_point = kUnicodeReplacementCharacter; | 
| +  return false; | 
| +} | 
| + | 
| }  // namespace url_canon | 
|  |