Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(983)

Side by Side Diff: third_party/WebKit/Source/platform/text/UnicodeUtilities.cpp

Issue 2385283002: reflow comments in platform/{testing,text} (Closed)
Patch Set: idunnolol Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All r ights reserved. 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All
3 * rights reserved.
3 * Copyright (C) 2005 Alexey Proskuryakov. 4 * Copyright (C) 2005 Alexey Proskuryakov.
4 * 5 *
5 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
7 * are met: 8 * are met:
8 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 switch (static_cast<UChar>(c)) { 46 switch (static_cast<UChar>(c)) {
46 case hebrewPunctuationGershayimCharacter: 47 case hebrewPunctuationGershayimCharacter:
47 case leftDoubleQuotationMarkCharacter: 48 case leftDoubleQuotationMarkCharacter:
48 case rightDoubleQuotationMarkCharacter: 49 case rightDoubleQuotationMarkCharacter:
49 return '"'; 50 return '"';
50 case hebrewPunctuationGereshCharacter: 51 case hebrewPunctuationGereshCharacter:
51 case leftSingleQuotationMarkCharacter: 52 case leftSingleQuotationMarkCharacter:
52 case rightSingleQuotationMarkCharacter: 53 case rightSingleQuotationMarkCharacter:
53 return '\''; 54 return '\'';
54 case softHyphenCharacter: 55 case softHyphenCharacter:
55 // Replace soft hyphen with an ignorable character so that their presence or absence will 56 // Replace soft hyphen with an ignorable character so that their presence
57 // or absence will
56 // not affect string comparison. 58 // not affect string comparison.
57 return 0; 59 return 0;
58 default: 60 default:
59 return c; 61 return c;
60 } 62 }
61 } 63 }
62 64
63 void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length) { 65 void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length) {
64 for (size_t i = 0; i < length; ++i) 66 for (size_t i = 0; i < length; ++i)
65 data[i] = foldQuoteMarkOrSoftHyphen(data[i]); 67 data[i] = foldQuoteMarkOrSoftHyphen(data[i]);
66 } 68 }
67 69
68 void foldQuoteMarksAndSoftHyphens(String& s) { 70 void foldQuoteMarksAndSoftHyphens(String& s) {
69 s.replace(hebrewPunctuationGereshCharacter, '\''); 71 s.replace(hebrewPunctuationGereshCharacter, '\'');
70 s.replace(hebrewPunctuationGershayimCharacter, '"'); 72 s.replace(hebrewPunctuationGershayimCharacter, '"');
71 s.replace(leftDoubleQuotationMarkCharacter, '"'); 73 s.replace(leftDoubleQuotationMarkCharacter, '"');
72 s.replace(leftSingleQuotationMarkCharacter, '\''); 74 s.replace(leftSingleQuotationMarkCharacter, '\'');
73 s.replace(rightDoubleQuotationMarkCharacter, '"'); 75 s.replace(rightDoubleQuotationMarkCharacter, '"');
74 s.replace(rightSingleQuotationMarkCharacter, '\''); 76 s.replace(rightSingleQuotationMarkCharacter, '\'');
75 // Replace soft hyphen with an ignorable character so that their presence or a bsence will 77 // Replace soft hyphen with an ignorable character so that their presence or
78 // absence will
76 // not affect string comparison. 79 // not affect string comparison.
77 s.replace(softHyphenCharacter, static_cast<UChar>('\0')); 80 s.replace(softHyphenCharacter, static_cast<UChar>('\0'));
78 } 81 }
79 82
80 static bool isNonLatin1Separator(UChar32 character) { 83 static bool isNonLatin1Separator(UChar32 character) {
81 DCHECK_GE(character, 256); 84 DCHECK_GE(character, 256);
82 return U_GET_GC_MASK(character) & 85 return U_GET_GC_MASK(character) &
83 (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK); 86 (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK);
84 } 87 }
85 88
86 bool isSeparator(UChar32 character) { 89 bool isSeparator(UChar32 character) {
87 static const bool 90 // clang-format off
88 latin1SeparatorTable[256] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91 static const bool latin1SeparatorTable[256] = {
89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
91 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . / 94 // space ! " # $ % & ' ( ) * + , - . /
92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, // : ; < = > ? 96 // : ; < = > ?
94 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
95 0, // @ 98 // @
96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 99 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 1, // [ \ ] ^ _ 100 // [ \ ] ^ _
98 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
99 0, // ` 102 // `
100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 103 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101 0, // { | } ~ 104 // { | } ~
102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 108 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 112 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
110 0, 0, 0, 0, 0, 0, 0, 0}; 113 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
111 114 };
115 // clang-format on
112 if (character < 256) 116 if (character < 256)
113 return latin1SeparatorTable[character]; 117 return latin1SeparatorTable[character];
114 118
115 return isNonLatin1Separator(character); 119 return isNonLatin1Separator(character);
116 } 120 }
117 121
118 // ICU's search ignores the distinction between small kana letters and ones 122 // ICU's search ignores the distinction between small kana letters and ones
119 // that are not small, and also characters that differ only in the voicing 123 // that are not small, and also characters that differ only in the voicing
120 // marks when considering only primary collation strength differences. 124 // marks when considering only primary collation strength differences.
121 // This is not helpful for end users, since these differences make words 125 // This is not helpful for end users, since these differences make words
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
306 310
307 if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) 311 if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING)
308 return; 312 return;
309 313
310 status = U_ZERO_ERROR; 314 status = U_ZERO_ERROR;
311 unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, 315 unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize,
312 &status); 316 &status);
313 ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 317 ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
314 } 318 }
315 319
316 // This function returns kNotFound if |first| and |second| contain different Kan a letters. 320 // This function returns kNotFound if |first| and |second| contain different
317 // If |first| and |second| contain the same Kana letter 321 // Kana letters. If |first| and |second| contain the same Kana letter then
318 // then function returns offset in characters from |first|. 322 // function returns offset in characters from |first|.
319 // Pointers to both strings increase simultaneously so so it is possible to use one offset value. 323 // Pointers to both strings increase simultaneously so so it is possible to use
324 // one offset value.
320 static inline size_t compareKanaLetterAndComposedVoicedSoundMarks( 325 static inline size_t compareKanaLetterAndComposedVoicedSoundMarks(
321 const UChar* first, 326 const UChar* first,
322 const UChar* firstEnd, 327 const UChar* firstEnd,
323 const UChar* second, 328 const UChar* second,
324 const UChar* secondEnd) { 329 const UChar* secondEnd) {
325 const UChar* start = first; 330 const UChar* start = first;
326 // Check for differences in the kana letter character itself. 331 // Check for differences in the kana letter character itself.
327 if (isSmallKanaLetter(*first) != isSmallKanaLetter(*second)) 332 if (isSmallKanaLetter(*first) != isSmallKanaLetter(*second))
328 return kNotFound; 333 return kNotFound;
329 if (composedVoicedSoundMark(*first) != composedVoicedSoundMark(*second)) 334 if (composedVoicedSoundMark(*first) != composedVoicedSoundMark(*second))
330 return kNotFound; 335 return kNotFound;
331 ++first; 336 ++first;
332 ++second; 337 ++second;
333 338
334 // Check for differences in combining voiced sound marks found after the lette r. 339 // Check for differences in combining voiced sound marks found after the
340 // letter.
335 while (true) { 341 while (true) {
336 const bool secondIsNotSoundMark = 342 const bool secondIsNotSoundMark =
337 second == secondEnd || !isCombiningVoicedSoundMark(*second); 343 second == secondEnd || !isCombiningVoicedSoundMark(*second);
338 if (first == firstEnd || !isCombiningVoicedSoundMark(*first)) { 344 if (first == firstEnd || !isCombiningVoicedSoundMark(*first)) {
339 return secondIsNotSoundMark ? first - start : kNotFound; 345 return secondIsNotSoundMark ? first - start : kNotFound;
340 } 346 }
341 if (secondIsNotSoundMark) 347 if (secondIsNotSoundMark)
342 return kNotFound; 348 return kNotFound;
343 if (*first != *second) 349 if (*first != *second)
344 return kNotFound; 350 return kNotFound;
345 ++first; 351 ++first;
346 ++second; 352 ++second;
347 } 353 }
348 } 354 }
349 355
350 bool checkOnlyKanaLettersInStrings(const UChar* firstData, 356 bool checkOnlyKanaLettersInStrings(const UChar* firstData,
351 unsigned firstLength, 357 unsigned firstLength,
352 const UChar* secondData, 358 const UChar* secondData,
353 unsigned secondLength) { 359 unsigned secondLength) {
354 const UChar* a = firstData; 360 const UChar* a = firstData;
355 const UChar* aEnd = firstData + firstLength; 361 const UChar* aEnd = firstData + firstLength;
356 362
357 const UChar* b = secondData; 363 const UChar* b = secondData;
358 const UChar* bEnd = secondData + secondLength; 364 const UChar* bEnd = secondData + secondLength;
359 while (true) { 365 while (true) {
360 // Skip runs of non-kana-letter characters. This is necessary so we can 366 // Skip runs of non-kana-letter characters. This is necessary so we can
361 // correctly handle strings where the |firstData| and |secondData| have diff erent-length 367 // correctly handle strings where the |firstData| and |secondData| have
362 // runs of characters that match, while still double checking the correctnes s 368 // different-length runs of characters that match, while still double
363 // of matches of kana letters with other kana letters. 369 // checking the correctness of matches of kana letters with other kana
370 // letters.
364 while (a != aEnd && !isKanaLetter(*a)) 371 while (a != aEnd && !isKanaLetter(*a))
365 ++a; 372 ++a;
366 while (b != bEnd && !isKanaLetter(*b)) 373 while (b != bEnd && !isKanaLetter(*b))
367 ++b; 374 ++b;
368 375
369 // If we reached the end of either the target or the match, we should have 376 // If we reached the end of either the target or the match, we should have
370 // reached the end of both; both should have the same number of kana letters . 377 // reached the end of both; both should have the same number of kana
378 // letters.
371 if (a == aEnd || b == bEnd) { 379 if (a == aEnd || b == bEnd) {
372 return a == aEnd && b == bEnd; 380 return a == aEnd && b == bEnd;
373 } 381 }
374 382
375 // Check that single Kana letters in |a| and |b| are the same. 383 // Check that single Kana letters in |a| and |b| are the same.
376 const size_t offset = 384 const size_t offset =
377 compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd); 385 compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd);
378 if (offset == kNotFound) 386 if (offset == kNotFound)
379 return false; 387 return false;
380 388
(...skipping 13 matching lines...) Expand all
394 const UChar* b = secondData; 402 const UChar* b = secondData;
395 const UChar* bEnd = secondData + secondLength; 403 const UChar* bEnd = secondData + secondLength;
396 while (true) { 404 while (true) {
397 // Check for non-kana-letter characters. 405 // Check for non-kana-letter characters.
398 while (a != aEnd && !isKanaLetter(*a) && b != bEnd && !isKanaLetter(*b)) { 406 while (a != aEnd && !isKanaLetter(*a) && b != bEnd && !isKanaLetter(*b)) {
399 if (*a++ != *b++) 407 if (*a++ != *b++)
400 return false; 408 return false;
401 } 409 }
402 410
403 // If we reached the end of either the target or the match, we should have 411 // If we reached the end of either the target or the match, we should have
404 // reached the end of both; both should have the same number of kana letters . 412 // reached the end of both; both should have the same number of kana
413 // letters.
405 if (a == aEnd || b == bEnd) { 414 if (a == aEnd || b == bEnd) {
406 return a == aEnd && b == bEnd; 415 return a == aEnd && b == bEnd;
407 } 416 }
408 417
409 if (isKanaLetter(*a) != isKanaLetter(*b)) 418 if (isKanaLetter(*a) != isKanaLetter(*b))
410 return false; 419 return false;
411 420
412 // Check that single Kana letters in |a| and |b| are the same. 421 // Check that single Kana letters in |a| and |b| are the same.
413 const size_t offset = 422 const size_t offset =
414 compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd); 423 compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd);
415 if (offset == kNotFound) 424 if (offset == kNotFound)
416 return false; 425 return false;
417 426
418 // Update values of |a| and |b| after comparing. 427 // Update values of |a| and |b| after comparing.
419 a += offset; 428 a += offset;
420 b += offset; 429 b += offset;
421 } 430 }
422 } 431 }
423 432
424 } // namespace blink 433 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698