OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All r
ights reserved. | 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All |
| 3 * rights reserved. |
3 * Copyright (C) 2005 Alexey Proskuryakov. | 4 * Copyright (C) 2005 Alexey Proskuryakov. |
4 * | 5 * |
5 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
6 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
7 * are met: | 8 * are met: |
8 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
10 * 2. Redistributions in binary form must reproduce the above copyright | 11 * 2. Redistributions in binary form must reproduce the above copyright |
11 * notice, this list of conditions and the following disclaimer in the | 12 * notice, this list of conditions and the following disclaimer in the |
12 * documentation and/or other materials provided with the distribution. | 13 * documentation and/or other materials provided with the distribution. |
(...skipping 25 matching lines...) Expand all Loading... |
38 | 39 |
39 namespace blink { | 40 namespace blink { |
40 | 41 |
41 static const size_t minimumSearchBufferSize = 8192; | 42 static const size_t minimumSearchBufferSize = 8192; |
42 | 43 |
43 #if DCHECK_IS_ON() | 44 #if DCHECK_IS_ON() |
44 static bool searcherInUse; | 45 static bool searcherInUse; |
45 #endif | 46 #endif |
46 | 47 |
47 static UStringSearch* createSearcher() { | 48 static UStringSearch* createSearcher() { |
48 // Provide a non-empty pattern and non-empty text so usearch_open will not fai
l, | 49 // Provide a non-empty pattern and non-empty text so usearch_open will not |
49 // but it doesn't matter exactly what it is, since we don't perform any search
es | 50 // fail, but it doesn't matter exactly what it is, since we don't perform any |
50 // without setting both the pattern and the text. | 51 // searches without setting both the pattern and the text. |
51 UErrorCode status = U_ZERO_ERROR; | 52 UErrorCode status = U_ZERO_ERROR; |
52 String searchCollatorName = | 53 String searchCollatorName = |
53 currentSearchLocaleID() + String("@collation=search"); | 54 currentSearchLocaleID() + String("@collation=search"); |
54 UStringSearch* searcher = | 55 UStringSearch* searcher = |
55 usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, | 56 usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, |
56 searchCollatorName.utf8().data(), 0, &status); | 57 searchCollatorName.utf8().data(), 0, &status); |
57 DCHECK(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || | 58 DCHECK(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || |
58 status == U_USING_DEFAULT_WARNING) | 59 status == U_USING_DEFAULT_WARNING) |
59 << status; | 60 << status; |
60 return searcher; | 61 return searcher; |
(...skipping 22 matching lines...) Expand all Loading... |
83 : m_options(options), | 84 : m_options(options), |
84 m_prefixLength(0), | 85 m_prefixLength(0), |
85 m_numberOfCharactersJustAppended(0), | 86 m_numberOfCharactersJustAppended(0), |
86 m_atBreak(true), | 87 m_atBreak(true), |
87 m_needsMoreContext(options & AtWordStarts), | 88 m_needsMoreContext(options & AtWordStarts), |
88 m_targetRequiresKanaWorkaround(containsKanaLetters(target)) { | 89 m_targetRequiresKanaWorkaround(containsKanaLetters(target)) { |
89 DCHECK(!target.isEmpty()) << target; | 90 DCHECK(!target.isEmpty()) << target; |
90 target.appendTo(m_target); | 91 target.appendTo(m_target); |
91 | 92 |
92 // FIXME: We'd like to tailor the searcher to fold quote marks for us instead | 93 // FIXME: We'd like to tailor the searcher to fold quote marks for us instead |
93 // of doing it in a separate replacement pass here, but ICU doesn't offer a wa
y | 94 // of doing it in a separate replacement pass here, but ICU doesn't offer a |
94 // to add tailoring on top of the locale-specific tailoring as of this writing
. | 95 // way to add tailoring on top of the locale-specific tailoring as of this |
| 96 // writing. |
95 foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); | 97 foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); |
96 | 98 |
97 size_t targetLength = m_target.size(); | 99 size_t targetLength = m_target.size(); |
98 m_buffer.reserveInitialCapacity( | 100 m_buffer.reserveInitialCapacity( |
99 std::max(targetLength * 8, minimumSearchBufferSize)); | 101 std::max(targetLength * 8, minimumSearchBufferSize)); |
100 m_overlap = m_buffer.capacity() / 4; | 102 m_overlap = m_buffer.capacity() / 4; |
101 | 103 |
102 if ((m_options & AtWordStarts) && targetLength) { | 104 if ((m_options & AtWordStarts) && targetLength) { |
103 UChar32 targetFirstCharacter; | 105 UChar32 targetFirstCharacter; |
104 U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter); | 106 U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter); |
105 // Characters in the separator category never really occur at the beginning
of a word, | 107 // Characters in the separator category never really occur at the beginning |
106 // so if the target begins with such a character, we just ignore the AtWordS
tart option. | 108 // of a word, so if the target begins with such a character, we just ignore |
| 109 // the AtWordStart option. |
107 if (isSeparator(targetFirstCharacter)) { | 110 if (isSeparator(targetFirstCharacter)) { |
108 m_options &= ~AtWordStarts; | 111 m_options &= ~AtWordStarts; |
109 m_needsMoreContext = false; | 112 m_needsMoreContext = false; |
110 } | 113 } |
111 } | 114 } |
112 | 115 |
113 // Grab the single global searcher. | 116 // Grab the single global searcher. |
114 // If we ever have a reason to do more than once search buffer at once, we'll
have | 117 // If we ever have a reason to do more than once search buffer at once, we'll |
115 // to move to multiple searchers. | 118 // have to move to multiple searchers. |
116 lockSearcher(); | 119 lockSearcher(); |
117 | 120 |
118 UStringSearch* searcher = blink::searcher(); | 121 UStringSearch* searcher = blink::searcher(); |
119 UCollator* collator = usearch_getCollator(searcher); | 122 UCollator* collator = usearch_getCollator(searcher); |
120 | 123 |
121 UCollationStrength strength = | 124 UCollationStrength strength = |
122 m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; | 125 m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; |
123 if (ucol_getStrength(collator) != strength) { | 126 if (ucol_getStrength(collator) != strength) { |
124 ucol_setStrength(collator, strength); | 127 ucol_setStrength(collator, strength); |
125 usearch_reset(searcher); | 128 usearch_reset(searcher); |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
242 U16_PREV(m_buffer.data(), 0, offset, previousCharacter); | 245 U16_PREV(m_buffer.data(), 0, offset, previousCharacter); |
243 | 246 |
244 if (isSeparator(firstCharacter)) { | 247 if (isSeparator(firstCharacter)) { |
245 // The start of a separator run is a word start (".org" in "webkit.org"). | 248 // The start of a separator run is a word start (".org" in "webkit.org"). |
246 if (!isSeparator(previousCharacter)) | 249 if (!isSeparator(previousCharacter)) |
247 return true; | 250 return true; |
248 } else if (isASCIIUpper(firstCharacter)) { | 251 } else if (isASCIIUpper(firstCharacter)) { |
249 // The start of an uppercase run is a word start ("Kit" in "WebKit"). | 252 // The start of an uppercase run is a word start ("Kit" in "WebKit"). |
250 if (!isASCIIUpper(previousCharacter)) | 253 if (!isASCIIUpper(previousCharacter)) |
251 return true; | 254 return true; |
252 // The last character of an uppercase run followed by a non-separator, non
-digit | 255 // The last character of an uppercase run followed by a non-separator, |
253 // is a word start ("Request" in "XMLHTTPRequest"). | 256 // non-digit is a word start ("Request" in "XMLHTTPRequest"). |
254 offset = start; | 257 offset = start; |
255 U16_FWD_1(m_buffer.data(), offset, size); | 258 U16_FWD_1(m_buffer.data(), offset, size); |
256 UChar32 nextCharacter = 0; | 259 UChar32 nextCharacter = 0; |
257 if (offset < size) | 260 if (offset < size) |
258 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter); | 261 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter); |
259 if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && | 262 if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && |
260 !isSeparator(nextCharacter)) | 263 !isSeparator(nextCharacter)) |
261 return true; | 264 return true; |
262 } else if (isASCIIDigit(firstCharacter)) { | 265 } else if (isASCIIDigit(firstCharacter)) { |
263 // The start of a digit run is a word start ("2" in "WebKit2"). | 266 // The start of a digit run is a word start ("2" in "WebKit2"). |
264 if (!isASCIIDigit(previousCharacter)) | 267 if (!isASCIIDigit(previousCharacter)) |
265 return true; | 268 return true; |
266 } else if (isSeparator(previousCharacter) || | 269 } else if (isSeparator(previousCharacter) || |
267 isASCIIDigit(previousCharacter)) { | 270 isASCIIDigit(previousCharacter)) { |
268 // The start of a non-separator, non-uppercase, non-digit run is a word st
art, | 271 // The start of a non-separator, non-uppercase, non-digit run is a word |
269 // except after an uppercase. ("org" in "webkit.org", but not "ore" in "We
bCore"). | 272 // start, except after an uppercase. ("org" in "webkit.org", but not "ore" |
| 273 // in "WebCore"). |
270 return true; | 274 return true; |
271 } | 275 } |
272 } | 276 } |
273 | 277 |
274 // Chinese and Japanese lack word boundary marks, and there is no clear agreem
ent on what constitutes | 278 // Chinese and Japanese lack word boundary marks, and there is no clear |
275 // a word, so treat the position before any CJK character as a word start. | 279 // agreement on what constitutes a word, so treat the position before any CJK |
| 280 // character as a word start. |
276 if (Character::isCJKIdeographOrSymbol(firstCharacter)) | 281 if (Character::isCJKIdeographOrSymbol(firstCharacter)) |
277 return true; | 282 return true; |
278 | 283 |
279 size_t wordBreakSearchStart = start + length; | 284 size_t wordBreakSearchStart = start + length; |
280 while (wordBreakSearchStart > start) | 285 while (wordBreakSearchStart > start) |
281 wordBreakSearchStart = | 286 wordBreakSearchStart = |
282 findNextWordFromIndex(m_buffer.data(), m_buffer.size(), | 287 findNextWordFromIndex(m_buffer.data(), m_buffer.size(), |
283 wordBreakSearchStart, false /* backwards */); | 288 wordBreakSearchStart, false /* backwards */); |
284 if (wordBreakSearchStart != start) | 289 if (wordBreakSearchStart != start) |
285 return false; | 290 return false; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
317 DCHECK_EQ(matchStart, USEARCH_DONE); | 322 DCHECK_EQ(matchStart, USEARCH_DONE); |
318 return 0; | 323 return 0; |
319 } | 324 } |
320 | 325 |
321 // Matches that start in the overlap area are only tentative. | 326 // Matches that start in the overlap area are only tentative. |
322 // The same match may appear later, matching more characters, | 327 // The same match may appear later, matching more characters, |
323 // possibly including a combining character that's not yet in the buffer. | 328 // possibly including a combining character that's not yet in the buffer. |
324 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { | 329 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { |
325 size_t overlap = m_overlap; | 330 size_t overlap = m_overlap; |
326 if (m_options & AtWordStarts) { | 331 if (m_options & AtWordStarts) { |
327 // Ensure that there is sufficient context before matchStart the next time
around for | 332 // Ensure that there is sufficient context before matchStart the next time |
328 // determining if it is at a word boundary. | 333 // around for determining if it is at a word boundary. |
329 int wordBoundaryContextStart = matchStart; | 334 int wordBoundaryContextStart = matchStart; |
330 U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); | 335 U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); |
331 wordBoundaryContextStart = startOfLastWordBoundaryContext( | 336 wordBoundaryContextStart = startOfLastWordBoundaryContext( |
332 m_buffer.data(), wordBoundaryContextStart); | 337 m_buffer.data(), wordBoundaryContextStart); |
333 overlap = std::min(size - 1, | 338 overlap = std::min(size - 1, |
334 std::max(overlap, size - wordBoundaryContextStart)); | 339 std::max(overlap, size - wordBoundaryContextStart)); |
335 } | 340 } |
336 memcpy(m_buffer.data(), m_buffer.data() + size - overlap, | 341 memcpy(m_buffer.data(), m_buffer.data() + size - overlap, |
337 overlap * sizeof(UChar)); | 342 overlap * sizeof(UChar)); |
338 m_prefixLength -= std::min(m_prefixLength, size - overlap); | 343 m_prefixLength -= std::min(m_prefixLength, size - overlap); |
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
482 | 487 |
483 EphemeralRangeInFlatTree findPlainText( | 488 EphemeralRangeInFlatTree findPlainText( |
484 const EphemeralRangeInFlatTree& inputRange, | 489 const EphemeralRangeInFlatTree& inputRange, |
485 const String& target, | 490 const String& target, |
486 FindOptions options) { | 491 FindOptions options) { |
487 return findPlainTextAlgorithm<EditingInFlatTreeStrategy>(inputRange, target, | 492 return findPlainTextAlgorithm<EditingInFlatTreeStrategy>(inputRange, target, |
488 options); | 493 options); |
489 } | 494 } |
490 | 495 |
491 } // namespace blink | 496 } // namespace blink |
OLD | NEW |