Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextCodecICU.cpp

Issue 2373983006: reflow comments in wtf/text (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
81 // Try IANA to pick up 'windows-12xx' and other names 81 // Try IANA to pick up 'windows-12xx' and other names
82 // which are not preferred MIME names but are widely used. 82 // which are not preferred MIME names but are widely used.
83 standardName = ucnv_getStandardName(name, secondaryStandard, &error); 83 standardName = ucnv_getStandardName(name, secondaryStandard, &error);
84 if (U_FAILURE(error) || !standardName) 84 if (U_FAILURE(error) || !standardName)
85 continue; 85 continue;
86 } 86 }
87 87
88 // A number of these aliases are handled in Chrome's copy of ICU, but 88 // A number of these aliases are handled in Chrome's copy of ICU, but
89 // Chromium can be compiled with the system ICU. 89 // Chromium can be compiled with the system ICU.
90 90
91 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other br owsers. 91 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other
92 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding 92 // browsers.
93 // for encoding GB_2312-80 and several others. So, we need to override this b ehavior, too. 93 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native
94 // encoding for encoding GB_2312-80 and several others. So, we need to
95 // override this behavior, too.
94 #if defined(USING_SYSTEM_ICU) 96 #if defined(USING_SYSTEM_ICU)
95 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80")) 97 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80"))
96 standardName = "GBK"; 98 standardName = "GBK";
97 // Similarly, EUC-KR encodings all map to an extended version, but 99 // Similarly, EUC-KR encodings all map to an extended version, but
98 // per HTML5, the canonical name still should be EUC-KR. 100 // per HTML5, the canonical name still should be EUC-KR.
99 else if (!strcmp(standardName, "EUC-KR") || 101 else if (!strcmp(standardName, "EUC-KR") ||
100 !strcmp(standardName, "KSC_5601") || 102 !strcmp(standardName, "KSC_5601") ||
101 !strcmp(standardName, "cp1363")) 103 !strcmp(standardName, "cp1363"))
102 standardName = "EUC-KR"; 104 standardName = "EUC-KR";
103 // And so on. 105 // And so on.
104 else if ( 106 else if (!strcasecmp(standardName, "iso-8859-9"))
105 !strcasecmp( 107 // This name is returned in different case by ICU 3.2 and 3.6.
106 standardName,
107 "iso-8859-9")) // This name is returned in different case by ICU 3. 2 and 3.6.
108 standardName = "windows-1254"; 108 standardName = "windows-1254";
109 else if (!strcmp(standardName, "TIS-620")) 109 else if (!strcmp(standardName, "TIS-620"))
110 standardName = "windows-874"; 110 standardName = "windows-874";
111 #endif 111 #endif
112 112
113 registrar(standardName, standardName); 113 registrar(standardName, standardName);
114 114
115 uint16_t numAliases = ucnv_countAliases(name, &error); 115 uint16_t numAliases = ucnv_countAliases(name, &error);
116 ASSERT(U_SUCCESS(error)); 116 ASSERT(U_SUCCESS(error));
117 if (U_SUCCESS(error)) 117 if (U_SUCCESS(error))
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
175 registrar("ISO8859-6", "ISO-8859-6"); 175 registrar("ISO8859-6", "ISO-8859-6");
176 registrar("ISO8859-7", "ISO-8859-7"); 176 registrar("ISO8859-7", "ISO-8859-7");
177 registrar("ISO8859-8", "ISO-8859-8"); 177 registrar("ISO8859-8", "ISO-8859-8");
178 registrar("ISO8859-8-I", "ISO-8859-8-I"); 178 registrar("ISO8859-8-I", "ISO-8859-8-I");
179 registrar("ISO8859-9", "ISO-8859-9"); 179 registrar("ISO8859-9", "ISO-8859-9");
180 registrar("ISO8859-10", "ISO-8859-10"); 180 registrar("ISO8859-10", "ISO-8859-10");
181 registrar("ISO8859-13", "ISO-8859-13"); 181 registrar("ISO8859-13", "ISO-8859-13");
182 registrar("ISO8859-14", "ISO-8859-14"); 182 registrar("ISO8859-14", "ISO-8859-14");
183 registrar("ISO8859-15", "ISO-8859-15"); 183 registrar("ISO8859-15", "ISO-8859-15");
184 // No need to have an entry for ISO8859-16. ISO-8859-16 has just one label 184 // No need to have an entry for ISO8859-16. ISO-8859-16 has just one label
185 // listed in WHATWG Encoding Living Standard (http://encoding.spec.whatwg.org/ ). 185 // listed in WHATWG Encoding Living Standard, http://encoding.spec.whatwg.org/
186 186
187 // Additional aliases present in the WHATWG Encoding Standard 187 // Additional aliases present in the WHATWG Encoding Standard
188 // and Firefox (as of Oct 2014), but not in the upstream ICU. 188 // and Firefox (as of Oct 2014), but not in the upstream ICU.
189 // Three entries for windows-1252 need not be listed here because 189 // Three entries for windows-1252 need not be listed here because
190 // TextCodecLatin1 registers them. 190 // TextCodecLatin1 registers them.
191 registrar("csiso58gb231280", "GBK"); 191 registrar("csiso58gb231280", "GBK");
192 registrar("csiso88596e", "ISO-8859-6"); 192 registrar("csiso88596e", "ISO-8859-6");
193 registrar("csiso88596i", "ISO-8859-6"); 193 registrar("csiso88596i", "ISO-8859-6");
194 registrar("csiso88598e", "ISO-8859-8"); 194 registrar("csiso88598e", "ISO-8859-8");
195 registrar("gb_2312", "GBK"); 195 registrar("gb_2312", "GBK");
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
372 int32_t* offsets = nullptr; 372 int32_t* offsets = nullptr;
373 UErrorCode err = U_ZERO_ERROR; 373 UErrorCode err = U_ZERO_ERROR;
374 374
375 do { 375 do {
376 int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit, 376 int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit,
377 offsets, flush != DoNotFlush, err); 377 offsets, flush != DoNotFlush, err);
378 result.append(buffer, ucharsDecoded); 378 result.append(buffer, ucharsDecoded);
379 } while (err == U_BUFFER_OVERFLOW_ERROR); 379 } while (err == U_BUFFER_OVERFLOW_ERROR);
380 380
381 if (U_FAILURE(err)) { 381 if (U_FAILURE(err)) {
382 // flush the converter so it can be reused, and not be bothered by this erro r. 382 // flush the converter so it can be reused, and not be bothered by this
383 // error.
383 do { 384 do {
384 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true, 385 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true,
385 err); 386 err);
386 } while (source < sourceLimit); 387 } while (source < sourceLimit);
387 sawError = true; 388 sawError = true;
388 } 389 }
389 390
390 #if !defined(USING_SYSTEM_ICU) 391 #if !defined(USING_SYSTEM_ICU)
391 // Chrome's copy of ICU does not have the issue described below. 392 // Chrome's copy of ICU does not have the issue described below.
392 return result.toString(); 393 return result.toString();
393 #else 394 #else
394 String resultString = result.toString(); 395 String resultString = result.toString();
395 396
396 // <http://bugs.webkit.org/show_bug.cgi?id=17014> 397 // <http://bugs.webkit.org/show_bug.cgi?id=17014>
397 // Simplified Chinese pages use the code A3A0 to mean "full-width space", but ICU decodes it as U+E5E5. 398 // Simplified Chinese pages use the code A3A0 to mean "full-width space", but
399 // ICU decodes it as U+E5E5.
398 if (!strcmp(m_encoding.name(), "GBK")) { 400 if (!strcmp(m_encoding.name(), "GBK")) {
399 if (!strcasecmp(m_encoding.name(), "gb18030")) 401 if (!strcasecmp(m_encoding.name(), "gb18030"))
400 resultString.replace(0xE5E5, ideographicSpaceCharacter); 402 resultString.replace(0xE5E5, ideographicSpaceCharacter);
401 // Make GBK compliant to the encoding spec and align with GB18030 403 // Make GBK compliant to the encoding spec and align with GB18030
402 resultString.replace(0x01F9, 0xE7C8); 404 resultString.replace(0x01F9, 0xE7C8);
403 // FIXME: Once https://www.w3.org/Bugs/Public/show_bug.cgi?id=28740#c3 405 // FIXME: Once https://www.w3.org/Bugs/Public/show_bug.cgi?id=28740#c3
404 // is resolved, add U+1E3F => 0xE7C7. 406 // is resolved, add U+1E3F => 0xE7C7.
405 } 407 }
406 408
407 return resultString; 409 return resultString;
408 #endif 410 #endif
409 } 411 }
410 412
411 #if defined(USING_SYSTEM_ICU) 413 #if defined(USING_SYSTEM_ICU)
412 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding 414 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding
413 // spec, but ICU converter does not have them. 415 // spec, but ICU converter does not have them.
414 static UChar fallbackForGBK(UChar32 character) { 416 static UChar fallbackForGBK(UChar32 character) {
415 switch (character) { 417 switch (character) {
416 case 0x01F9: 418 case 0x01F9:
417 return 0xE7C8; // mapped to xA8xBF by ICU. 419 return 0xE7C8; // mapped to xA8xBF by ICU.
418 case 0x1E3F: 420 case 0x1E3F:
419 return 0xE7C7; // mapped to xA8xBC by ICU. 421 return 0xE7C7; // mapped to xA8xBC by ICU.
420 } 422 }
421 return 0; 423 return 0;
422 } 424 }
423 #endif 425 #endif
424 426
425 // Generic helper for writing escaped entities using the specfied UnencodableHan dling. 427 // Generic helper for writing escaped entities using the specfied
428 // UnencodableHandling.
426 static void formatEscapedEntityCallback(const void* context, 429 static void formatEscapedEntityCallback(const void* context,
427 UConverterFromUnicodeArgs* fromUArgs, 430 UConverterFromUnicodeArgs* fromUArgs,
428 const UChar* codeUnits, 431 const UChar* codeUnits,
429 int32_t length, 432 int32_t length,
430 UChar32 codePoint, 433 UChar32 codePoint,
431 UConverterCallbackReason reason, 434 UConverterCallbackReason reason,
432 UErrorCode* err, 435 UErrorCode* err,
433 UnencodableHandling handling) { 436 UnencodableHandling handling) {
434 if (reason == UCNV_UNASSIGNED) { 437 if (reason == UCNV_UNASSIGNED) {
435 *err = U_ZERO_ERROR; 438 *err = U_ZERO_ERROR;
(...skipping 13 matching lines...) Expand all
449 const UChar* codeUnits, 452 const UChar* codeUnits,
450 int32_t length, 453 int32_t length,
451 UChar32 codePoint, 454 UChar32 codePoint,
452 UConverterCallbackReason reason, 455 UConverterCallbackReason reason,
453 UErrorCode* err) { 456 UErrorCode* err) {
454 formatEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, 457 formatEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint,
455 reason, err, EntitiesForUnencodables); 458 reason, err, EntitiesForUnencodables);
456 } 459 }
457 460
458 // Invalid character handler when writing escaped entities in CSS encoding for 461 // Invalid character handler when writing escaped entities in CSS encoding for
459 // unrepresentable characters. See the declaration of TextCodec::encode for more . 462 // unrepresentable characters. See the declaration of TextCodec::encode for
463 // more.
460 static void cssEscapedEntityCallback(const void* context, 464 static void cssEscapedEntityCallback(const void* context,
461 UConverterFromUnicodeArgs* fromUArgs, 465 UConverterFromUnicodeArgs* fromUArgs,
462 const UChar* codeUnits, 466 const UChar* codeUnits,
463 int32_t length, 467 int32_t length,
464 UChar32 codePoint, 468 UChar32 codePoint,
465 UConverterCallbackReason reason, 469 UConverterCallbackReason reason,
466 UErrorCode* err) { 470 UErrorCode* err) {
467 formatEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, 471 formatEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint,
468 reason, err, CSSEncodedEntitiesForUnencodables); 472 reason, err, CSSEncodedEntitiesForUnencodables);
469 } 473 }
470 474
471 // Invalid character handler when writing escaped entities in HTML/XML encoding for 475 // Invalid character handler when writing escaped entities in HTML/XML encoding
472 // unrepresentable characters. See the declaration of TextCodec::encode for more . 476 // for unrepresentable characters. See the declaration of TextCodec::encode for
477 // more.
473 static void urlEscapedEntityCallback(const void* context, 478 static void urlEscapedEntityCallback(const void* context,
474 UConverterFromUnicodeArgs* fromUArgs, 479 UConverterFromUnicodeArgs* fromUArgs,
475 const UChar* codeUnits, 480 const UChar* codeUnits,
476 int32_t length, 481 int32_t length,
477 UChar32 codePoint, 482 UChar32 codePoint,
478 UConverterCallbackReason reason, 483 UConverterCallbackReason reason,
479 UErrorCode* err) { 484 UErrorCode* err) {
480 formatEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, 485 formatEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint,
481 reason, err, URLEncodedEntitiesForUnencodables); 486 reason, err, URLEncodedEntitiesForUnencodables);
482 } 487 }
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after
693 return encodeCommon(characters, length, handling); 698 return encodeCommon(characters, length, handling);
694 } 699 }
695 700
696 CString TextCodecICU::encode(const LChar* characters, 701 CString TextCodecICU::encode(const LChar* characters,
697 size_t length, 702 size_t length,
698 UnencodableHandling handling) { 703 UnencodableHandling handling) {
699 return encodeCommon(characters, length, handling); 704 return encodeCommon(characters, length, handling);
700 } 705 }
701 706
702 } // namespace WTF 707 } // namespace WTF
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698