base/i18n/icu_string_conversions.cc - Issue 1141793003: Update from https://crrev.com/329939

Side by Side Diff: base/i18n/icu_string_conversions.cc

Issue 1141793003: Update from https://crrev.com/329939 (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/i18n/icu_string_conversions.h"	5 #include "base/i18n/icu_string_conversions.h"

6	6

7 #include <vector>	7 #include <vector>

8	8

9 #include "base/basictypes.h"	9 #include "base/basictypes.h"

10 #include "base/logging.h"	10 #include "base/logging.h"

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
127 break;	127 break;

128 case OnStringConversionError::SUBSTITUTE:	128 case OnStringConversionError::SUBSTITUTE:

129 ucnv_setToUCallBack(converter, ToUnicodeCallbackSubstitute, 0,	129 ucnv_setToUCallBack(converter, ToUnicodeCallbackSubstitute, 0,

130 NULL, NULL, status);	130 NULL, NULL, status);

131 break;	131 break;

132 default:	132 default:

133 NOTREACHED();	133 NOTREACHED();

134 }	134 }

135 }	135 }

136	136

137 inline UConverterType utf32_platform_endian() {

138 #if U_IS_BIG_ENDIAN

139 return UCNV_UTF32_BigEndian;

140 #else

141 return UCNV_UTF32_LittleEndian;

142 #endif

143 }

144

145 } // namespace	137 } // namespace

146	138

147 // Codepage <-> Wide/UTF-16 ---------------------------------------------------	139 // Codepage <-> Wide/UTF-16 ---------------------------------------------------

148	140

149 bool UTF16ToCodepage(const string16& utf16,	141 bool UTF16ToCodepage(const string16& utf16,

150 const char* codepage_name,	142 const char* codepage_name,

151 OnStringConversionError::Type on_error,	143 OnStringConversionError::Type on_error,

152 std::string* encoded) {	144 std::string* encoded) {

153 encoded->clear();	145 encoded->clear();

154	146

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
190 ucnv_close(converter);	182 ucnv_close(converter);

191 if (!U_SUCCESS(status)) {	183 if (!U_SUCCESS(status)) {

192 utf16->clear(); // Make sure the output is empty on error.	184 utf16->clear(); // Make sure the output is empty on error.

193 return false;	185 return false;

194 }	186 }

195	187

196 utf16->assign(buffer.get(), actual_size);	188 utf16->assign(buffer.get(), actual_size);

197 return true;	189 return true;

198 }	190 }

199	191

200 bool WideToCodepage(const std::wstring& wide,

201 const char* codepage_name,

202 OnStringConversionError::Type on_error,

203 std::string* encoded) {

204 #if defined(WCHAR_T_IS_UTF16)

205 return UTF16ToCodepage(wide, codepage_name, on_error, encoded);

206 #elif defined(WCHAR_T_IS_UTF32)

207 encoded->clear();

208

209 UErrorCode status = U_ZERO_ERROR;

210 UConverter* converter = ucnv_open(codepage_name, &status);

211 if (!U_SUCCESS(status))

212 return false;

213

214 int utf16_len;

215 // When wchar_t is wider than UChar (16 bits), transform \|wide\| into a

216 // UChar* string. Size the UChar* buffer to be large enough to hold twice

217 // as many UTF-16 code units (UChar's) as there are Unicode code points,

218 // in case each code points translates to a UTF-16 surrogate pair,

219 // and leave room for a NUL terminator.

220 std::vector<UChar> utf16(wide.length() * 2 + 1);

221 u_strFromUTF32(&utf16[0], utf16.size(), &utf16_len,

222 reinterpret_cast<const UChar32*>(wide.c_str()),

223 wide.length(), &status);

224 DCHECK(U_SUCCESS(status)) << "failed to convert wstring to UChar*";

225

226 return ConvertFromUTF16(converter, &utf16[0], utf16_len, on_error, encoded);

227 #endif // defined(WCHAR_T_IS_UTF32)

228 }

229

230 bool CodepageToWide(const std::string& encoded,

231 const char* codepage_name,

232 OnStringConversionError::Type on_error,

233 std::wstring* wide) {

234 #if defined(WCHAR_T_IS_UTF16)

235 return CodepageToUTF16(encoded, codepage_name, on_error, wide);

236 #elif defined(WCHAR_T_IS_UTF32)

237 wide->clear();

238

239 UErrorCode status = U_ZERO_ERROR;

240 UConverter* converter = ucnv_open(codepage_name, &status);

241 if (!U_SUCCESS(status))

242 return false;

243

244 // The maximum length in 4 byte unit of UTF-32 output would be

245 // at most the same as the number of bytes in input. In the worst

246 // case of GB18030 (excluding escaped-based encodings like ISO-2022-JP),

247 // this can be 4 times larger than actually needed.

248 size_t wchar_max_length = encoded.length() + 1;

249

250 SetUpErrorHandlerForToUChars(on_error, converter, &status);

251 scoped_ptr<wchar_t[]> buffer(new wchar_t[wchar_max_length]);

252 int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter,

253 reinterpret_cast<char*>(buffer.get()),

254 static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(),

255 static_cast<int>(encoded.length()), &status);

256 ucnv_close(converter);

257 if (!U_SUCCESS(status)) {

258 wide->clear(); // Make sure the output is empty on error.

259 return false;

260 }

261

262 // actual_size is # of bytes.

263 wide->assign(buffer.get(), actual_size / sizeof(wchar_t));

264 return true;

265 #endif // defined(WCHAR_T_IS_UTF32)

266 }

267

268 bool ConvertToUtf8AndNormalize(const std::string& text,	192 bool ConvertToUtf8AndNormalize(const std::string& text,

269 const std::string& charset,	193 const std::string& charset,

270 std::string* result) {	194 std::string* result) {

271 result->clear();	195 result->clear();

272 string16 utf16;	196 string16 utf16;

273 if (!CodepageToUTF16(	197 if (!CodepageToUTF16(

274 text, charset.c_str(), OnStringConversionError::FAIL, &utf16))	198 text, charset.c_str(), OnStringConversionError::FAIL, &utf16))

275 return false;	199 return false;

276	200

277 UErrorCode status = U_ZERO_ERROR;	201 UErrorCode status = U_ZERO_ERROR;

278 size_t max_length = utf16.length() + 1;	202 size_t max_length = utf16.length() + 1;

279 string16 normalized_utf16;	203 string16 normalized_utf16;

280 scoped_ptr<char16[]> buffer(new char16[max_length]);	204 scoped_ptr<char16[]> buffer(new char16[max_length]);

281 int actual_length = unorm_normalize(	205 int actual_length = unorm_normalize(

282 utf16.c_str(), utf16.length(), UNORM_NFC, 0,	206 utf16.c_str(), utf16.length(), UNORM_NFC, 0,

283 buffer.get(), static_cast<int>(max_length), &status);	207 buffer.get(), static_cast<int>(max_length), &status);

284 if (!U_SUCCESS(status))	208 if (!U_SUCCESS(status))

285 return false;	209 return false;

286 normalized_utf16.assign(buffer.get(), actual_length);	210 normalized_utf16.assign(buffer.get(), actual_length);

287	211

288 return UTF16ToUTF8(normalized_utf16.data(),	212 return UTF16ToUTF8(normalized_utf16.data(),

289 normalized_utf16.length(), result);	213 normalized_utf16.length(), result);

290 }	214 }

291	215

292 } // namespace base	216 } // namespace base

OLD	NEW

« no previous file with comments | « base/i18n/icu_string_conversions.h ('k') | base/i18n/icu_string_conversions_unittest.cc » ('j') | no next file with comments »