third_party/WebKit/Source/platform/text/UnicodeUtilities.cpp - Issue 2385283002: reflow comments in platform/{testing,text}

Side by Side Diff: third_party/WebKit/Source/platform/text/UnicodeUtilities.cpp

Issue 2385283002: reflow comments in platform/{testing,text} (Closed)

Patch Set: idunnolol Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/WebKit/Source/platform/text/UnicodeRange.cpp ('k') | third_party/WebKit/Source/platform/text/UnicodeUtilitiesTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All r ights reserved.	2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All

	3 * rights reserved.

3 * Copyright (C) 2005 Alexey Proskuryakov.	4 * Copyright (C) 2005 Alexey Proskuryakov.

4 *	5 *

5 * Redistribution and use in source and binary forms, with or without	6 * Redistribution and use in source and binary forms, with or without

6 * modification, are permitted provided that the following conditions	7 * modification, are permitted provided that the following conditions

7 * are met:	8 * are met:

8 * 1. Redistributions of source code must retain the above copyright	9 * 1. Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	10 * notice, this list of conditions and the following disclaimer.

10 * 2. Redistributions in binary form must reproduce the above copyright	11 * 2. Redistributions in binary form must reproduce the above copyright

11 * notice, this list of conditions and the following disclaimer in the	12 * notice, this list of conditions and the following disclaimer in the

12 * documentation and/or other materials provided with the distribution.	13 * documentation and/or other materials provided with the distribution.

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
45 switch (static_cast<UChar>(c)) {	46 switch (static_cast<UChar>(c)) {

46 case hebrewPunctuationGershayimCharacter:	47 case hebrewPunctuationGershayimCharacter:

47 case leftDoubleQuotationMarkCharacter:	48 case leftDoubleQuotationMarkCharacter:

48 case rightDoubleQuotationMarkCharacter:	49 case rightDoubleQuotationMarkCharacter:

49 return '"';	50 return '"';

50 case hebrewPunctuationGereshCharacter:	51 case hebrewPunctuationGereshCharacter:

51 case leftSingleQuotationMarkCharacter:	52 case leftSingleQuotationMarkCharacter:

52 case rightSingleQuotationMarkCharacter:	53 case rightSingleQuotationMarkCharacter:

53 return '\'';	54 return '\'';

54 case softHyphenCharacter:	55 case softHyphenCharacter:

55 // Replace soft hyphen with an ignorable character so that their presence or absence will	56 // Replace soft hyphen with an ignorable character so that their presence

	57 // or absence will

56 // not affect string comparison.	58 // not affect string comparison.

57 return 0;	59 return 0;

58 default:	60 default:

59 return c;	61 return c;

60 }	62 }

61 }	63 }

62	64

63 void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length) {	65 void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length) {

64 for (size_t i = 0; i < length; ++i)	66 for (size_t i = 0; i < length; ++i)

65 data[i] = foldQuoteMarkOrSoftHyphen(data[i]);	67 data[i] = foldQuoteMarkOrSoftHyphen(data[i]);

66 }	68 }

67	69

68 void foldQuoteMarksAndSoftHyphens(String& s) {	70 void foldQuoteMarksAndSoftHyphens(String& s) {

69 s.replace(hebrewPunctuationGereshCharacter, '\'');	71 s.replace(hebrewPunctuationGereshCharacter, '\'');

70 s.replace(hebrewPunctuationGershayimCharacter, '"');	72 s.replace(hebrewPunctuationGershayimCharacter, '"');

71 s.replace(leftDoubleQuotationMarkCharacter, '"');	73 s.replace(leftDoubleQuotationMarkCharacter, '"');

72 s.replace(leftSingleQuotationMarkCharacter, '\'');	74 s.replace(leftSingleQuotationMarkCharacter, '\'');

73 s.replace(rightDoubleQuotationMarkCharacter, '"');	75 s.replace(rightDoubleQuotationMarkCharacter, '"');

74 s.replace(rightSingleQuotationMarkCharacter, '\'');	76 s.replace(rightSingleQuotationMarkCharacter, '\'');

75 // Replace soft hyphen with an ignorable character so that their presence or a bsence will	77 // Replace soft hyphen with an ignorable character so that their presence or

	78 // absence will

76 // not affect string comparison.	79 // not affect string comparison.

77 s.replace(softHyphenCharacter, static_cast<UChar>('\0'));	80 s.replace(softHyphenCharacter, static_cast<UChar>('\0'));

78 }	81 }

79	82

80 static bool isNonLatin1Separator(UChar32 character) {	83 static bool isNonLatin1Separator(UChar32 character) {

81 DCHECK_GE(character, 256);	84 DCHECK_GE(character, 256);

82 return U_GET_GC_MASK(character) &	85 return U_GET_GC_MASK(character) &

83 (U_GC_S_MASK \| U_GC_P_MASK \| U_GC_Z_MASK \| U_GC_CF_MASK);	86 (U_GC_S_MASK \| U_GC_P_MASK \| U_GC_Z_MASK \| U_GC_CF_MASK);

84 }	87 }

85	88

86 bool isSeparator(UChar32 character) {	89 bool isSeparator(UChar32 character) {

87 static const bool	90 // clang-format off

88 latin1SeparatorTable[256] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	91 static const bool latin1SeparatorTable[256] = {

89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

90 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

91 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . /	94 // space ! " # $ % & ' ( ) * + , - . /

92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,	95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

93 1, // : ; < = > ?	96 // : ; < = > ?

94 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,

95 0, // @	98 // @

96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,	99 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

97 1, // [ \ ] ^ _	100 // [ \ ] ^ _

98 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,

99 0, // `	102 // `

100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,	103 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

101 0, // { \| } ~	104 // { \| } ~

102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,

103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

104 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,	107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

105 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,	108 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,

106 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	109 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,

107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,	110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

108 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	111 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,

109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,	112 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

110 0, 0, 0, 0, 0, 0, 0, 0};	113 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0

111	114 };

	115 // clang-format on

112 if (character < 256)	116 if (character < 256)

113 return latin1SeparatorTable[character];	117 return latin1SeparatorTable[character];

114	118

115 return isNonLatin1Separator(character);	119 return isNonLatin1Separator(character);

116 }	120 }

117	121

118 // ICU's search ignores the distinction between small kana letters and ones	122 // ICU's search ignores the distinction between small kana letters and ones

119 // that are not small, and also characters that differ only in the voicing	123 // that are not small, and also characters that differ only in the voicing

120 // marks when considering only primary collation strength differences.	124 // marks when considering only primary collation strength differences.

121 // This is not helpful for end users, since these differences make words	125 // This is not helpful for end users, since these differences make words

(...skipping 184 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
306	310

307 if (status == U_ZERO_ERROR \|\| status == U_STRING_NOT_TERMINATED_WARNING)	311 if (status == U_ZERO_ERROR \|\| status == U_STRING_NOT_TERMINATED_WARNING)

308 return;	312 return;

309	313

310 status = U_ZERO_ERROR;	314 status = U_ZERO_ERROR;

311 unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize,	315 unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize,

312 &status);	316 &status);

313 ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);	317 ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);

314 }	318 }

315	319

316 // This function returns kNotFound if \|first\| and \|second\| contain different Kan a letters.	320 // This function returns kNotFound if \|first\| and \|second\| contain different

317 // If \|first\| and \|second\| contain the same Kana letter	321 // Kana letters. If \|first\| and \|second\| contain the same Kana letter then

318 // then function returns offset in characters from \|first\|.	322 // function returns offset in characters from \|first\|.

319 // Pointers to both strings increase simultaneously so so it is possible to use one offset value.	323 // Pointers to both strings increase simultaneously so so it is possible to use

	324 // one offset value.

320 static inline size_t compareKanaLetterAndComposedVoicedSoundMarks(	325 static inline size_t compareKanaLetterAndComposedVoicedSoundMarks(

321 const UChar* first,	326 const UChar* first,

322 const UChar* firstEnd,	327 const UChar* firstEnd,

323 const UChar* second,	328 const UChar* second,

324 const UChar* secondEnd) {	329 const UChar* secondEnd) {

325 const UChar* start = first;	330 const UChar* start = first;

326 // Check for differences in the kana letter character itself.	331 // Check for differences in the kana letter character itself.

327 if (isSmallKanaLetter(first) != isSmallKanaLetter(second))	332 if (isSmallKanaLetter(first) != isSmallKanaLetter(second))

328 return kNotFound;	333 return kNotFound;

329 if (composedVoicedSoundMark(first) != composedVoicedSoundMark(second))	334 if (composedVoicedSoundMark(first) != composedVoicedSoundMark(second))

330 return kNotFound;	335 return kNotFound;

331 ++first;	336 ++first;

332 ++second;	337 ++second;

333	338

334 // Check for differences in combining voiced sound marks found after the lette r.	339 // Check for differences in combining voiced sound marks found after the

	340 // letter.

335 while (true) {	341 while (true) {

336 const bool secondIsNotSoundMark =	342 const bool secondIsNotSoundMark =

337 second == secondEnd \|\| !isCombiningVoicedSoundMark(*second);	343 second == secondEnd \|\| !isCombiningVoicedSoundMark(*second);

338 if (first == firstEnd \|\| !isCombiningVoicedSoundMark(*first)) {	344 if (first == firstEnd \|\| !isCombiningVoicedSoundMark(*first)) {

339 return secondIsNotSoundMark ? first - start : kNotFound;	345 return secondIsNotSoundMark ? first - start : kNotFound;

340 }	346 }

341 if (secondIsNotSoundMark)	347 if (secondIsNotSoundMark)

342 return kNotFound;	348 return kNotFound;

343 if (first != second)	349 if (first != second)

344 return kNotFound;	350 return kNotFound;

345 ++first;	351 ++first;

346 ++second;	352 ++second;

347 }	353 }

348 }	354 }

349	355

350 bool checkOnlyKanaLettersInStrings(const UChar* firstData,	356 bool checkOnlyKanaLettersInStrings(const UChar* firstData,

351 unsigned firstLength,	357 unsigned firstLength,

352 const UChar* secondData,	358 const UChar* secondData,

353 unsigned secondLength) {	359 unsigned secondLength) {

354 const UChar* a = firstData;	360 const UChar* a = firstData;

355 const UChar* aEnd = firstData + firstLength;	361 const UChar* aEnd = firstData + firstLength;

356	362

357 const UChar* b = secondData;	363 const UChar* b = secondData;

358 const UChar* bEnd = secondData + secondLength;	364 const UChar* bEnd = secondData + secondLength;

359 while (true) {	365 while (true) {

360 // Skip runs of non-kana-letter characters. This is necessary so we can	366 // Skip runs of non-kana-letter characters. This is necessary so we can

361 // correctly handle strings where the \|firstData\| and \|secondData\| have diff erent-length	367 // correctly handle strings where the \|firstData\| and \|secondData\| have

362 // runs of characters that match, while still double checking the correctnes s	368 // different-length runs of characters that match, while still double

363 // of matches of kana letters with other kana letters.	369 // checking the correctness of matches of kana letters with other kana

	370 // letters.

364 while (a != aEnd && !isKanaLetter(*a))	371 while (a != aEnd && !isKanaLetter(*a))

365 ++a;	372 ++a;

366 while (b != bEnd && !isKanaLetter(*b))	373 while (b != bEnd && !isKanaLetter(*b))

367 ++b;	374 ++b;

368	375

369 // If we reached the end of either the target or the match, we should have	376 // If we reached the end of either the target or the match, we should have

370 // reached the end of both; both should have the same number of kana letters .	377 // reached the end of both; both should have the same number of kana

	378 // letters.

371 if (a == aEnd \|\| b == bEnd) {	379 if (a == aEnd \|\| b == bEnd) {

372 return a == aEnd && b == bEnd;	380 return a == aEnd && b == bEnd;

373 }	381 }

374	382

375 // Check that single Kana letters in \|a\| and \|b\| are the same.	383 // Check that single Kana letters in \|a\| and \|b\| are the same.

376 const size_t offset =	384 const size_t offset =

377 compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd);	385 compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd);

378 if (offset == kNotFound)	386 if (offset == kNotFound)

379 return false;	387 return false;

380	388

(...skipping 13 matching lines...) Expand all Loading...
394 const UChar* b = secondData;	402 const UChar* b = secondData;

395 const UChar* bEnd = secondData + secondLength;	403 const UChar* bEnd = secondData + secondLength;

396 while (true) {	404 while (true) {

397 // Check for non-kana-letter characters.	405 // Check for non-kana-letter characters.

398 while (a != aEnd && !isKanaLetter(a) && b != bEnd && !isKanaLetter(b)) {	406 while (a != aEnd && !isKanaLetter(a) && b != bEnd && !isKanaLetter(b)) {

399 if (a++ != b++)	407 if (a++ != b++)

400 return false;	408 return false;

401 }	409 }

402	410

403 // If we reached the end of either the target or the match, we should have	411 // If we reached the end of either the target or the match, we should have

404 // reached the end of both; both should have the same number of kana letters .	412 // reached the end of both; both should have the same number of kana

	413 // letters.

405 if (a == aEnd \|\| b == bEnd) {	414 if (a == aEnd \|\| b == bEnd) {

406 return a == aEnd && b == bEnd;	415 return a == aEnd && b == bEnd;

407 }	416 }

408	417

409 if (isKanaLetter(a) != isKanaLetter(b))	418 if (isKanaLetter(a) != isKanaLetter(b))

410 return false;	419 return false;

411	420

412 // Check that single Kana letters in \|a\| and \|b\| are the same.	421 // Check that single Kana letters in \|a\| and \|b\| are the same.

413 const size_t offset =	422 const size_t offset =

414 compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd);	423 compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd);

415 if (offset == kNotFound)	424 if (offset == kNotFound)

416 return false;	425 return false;

417	426

418 // Update values of \|a\| and \|b\| after comparing.	427 // Update values of \|a\| and \|b\| after comparing.

419 a += offset;	428 a += offset;

420 b += offset;	429 b += offset;

421 }	430 }

422 }	431 }

423	432

424 } // namespace blink	433 } // namespace blink

OLD	NEW