| Index: base/string_util.h
|
| ===================================================================
|
| --- base/string_util.h (revision 19007)
|
| +++ base/string_util.h (working copy)
|
| @@ -186,6 +186,13 @@
|
| // do the best it can and put the result in the output buffer. The versions that
|
| // return strings ignore this error and just return the best conversion
|
| // possible.
|
| +//
|
| +// Note that only the structural validity is checked and non-character
|
| +// codepoints and unassigned are regarded as valid.
|
| +// TODO(jungshik): Consider replacing an invalid input sequence with
|
| +// the Unicode replacement character or adding |replacement_char| parameter.
|
| +// Currently, it's skipped in the ouput, which could be problematic in
|
| +// some situations.
|
| bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output);
|
| std::string WideToUTF8(const std::wstring& wide);
|
| bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output);
|
| @@ -250,6 +257,13 @@
|
| // string be 8-bit or UTF8? It contains only characters that are < 256 (in the
|
| // first case) or characters that use only 8-bits and whose 8-bit
|
| // representation looks like a UTF-8 string (the second case).
|
| +//
|
| +// Note that IsStringUTF8 checks not only if the input is structrually
|
| +// valid but also if it doesn't contain any non-character codepoint
|
| +// (e.g. U+FFFE). It's done on purpose because all the existing callers want
|
| +// to have the maximum 'discriminating' power from other encodings. If
|
| +// there's a use case for just checking the structural validity, we have to
|
| +// add a new function for that.
|
| bool IsString8Bit(const std::wstring& str);
|
| bool IsStringUTF8(const std::string& str);
|
| bool IsStringWideUTF8(const std::wstring& str);
|
|
|