Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(16)

Side by Side Diff: public/i18n/unicode/search.h

Issue 18836004: Move ICU headers from public/{common,i18n} to source/{common,i18n} (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu46.git@master
Patch Set: same as ps #3. retry uploading Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « public/i18n/unicode/regex.h ('k') | public/i18n/unicode/selfmt.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 **********************************************************************
3 * Copyright (C) 2001-2008 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
8 */
9
10 #ifndef SEARCH_H
11 #define SEARCH_H
12
13 #include "unicode/utypes.h"
14
15 /**
16 * \file
17 * \brief C++ API: SearchIterator object.
18 */
19
20 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
21
22 #include "unicode/uobject.h"
23 #include "unicode/unistr.h"
24 #include "unicode/chariter.h"
25 #include "unicode/brkiter.h"
26 #include "unicode/usearch.h"
27
28 /**
29 * @stable ICU 2.0
30 */
31 struct USearch;
32 /**
33 * @stable ICU 2.0
34 */
35 typedef struct USearch USearch;
36
37 U_NAMESPACE_BEGIN
38
39 /**
40 *
41 * <tt>SearchIterator</tt> is an abstract base class that provides
42 * methods to search for a pattern within a text string. Instances of
43 * <tt>SearchIterator</tt> maintain a current position and scans over the
44 * target text, returning the indices the pattern is matched and the length
45 * of each match.
46 * <p>
47 * <tt>SearchIterator</tt> defines a protocol for text searching.
48 * Subclasses provide concrete implementations of various search algorithms.
49 * For example, <tt>StringSearch</tt> implements language-sensitive pattern
50 * matching based on the comparison rules defined in a
51 * <tt>RuleBasedCollator</tt> object.
52 * <p>
53 * Other options for searching includes using a BreakIterator to restrict
54 * the points at which matches are detected.
55 * <p>
56 * <tt>SearchIterator</tt> provides an API that is similar to that of
57 * other text iteration classes such as <tt>BreakIterator</tt>. Using
58 * this class, it is easy to scan through text looking for all occurances of
59 * a given pattern. The following example uses a <tt>StringSearch</tt>
60 * object to find all instances of "fox" in the target string. Any other
61 * subclass of <tt>SearchIterator</tt> can be used in an identical
62 * manner.
63 * <pre><code>
64 * UnicodeString target("The quick brown fox jumped over the lazy fox");
65 * UnicodeString pattern("fox");
66 *
67 * SearchIterator *iter = new StringSearch(pattern, target);
68 * UErrorCode error = U_ZERO_ERROR;
69 * for (int pos = iter->first(error); pos != USEARCH_DONE;
70 * pos = iter->next(error)) {
71 * printf("Found match at %d pos, length is %d\n", pos,
72 * iter.getMatchLength());
73 * }
74 * </code></pre>
75 *
76 * @see StringSearch
77 * @see RuleBasedCollator
78 */
79 class U_I18N_API SearchIterator : public UObject {
80
81 public:
82
83 // public constructors and destructors -------------------------------
84
85 /**
86 * Copy constructor that creates a SearchIterator instance with the same
87 * behavior, and iterating over the same text.
88 * @param other the SearchIterator instance to be copied.
89 * @stable ICU 2.0
90 */
91 SearchIterator(const SearchIterator &other);
92
93 /**
94 * Destructor. Cleans up the search iterator data struct.
95 * @stable ICU 2.0
96 */
97 virtual ~SearchIterator();
98
99 // public get and set methods ----------------------------------------
100
101 /**
102 * Sets the index to point to the given position, and clears any state
103 * that's affected.
104 * <p>
105 * This method takes the argument index and sets the position in the text
106 * string accordingly without checking if the index is pointing to a
107 * valid starting point to begin searching.
108 * @param position within the text to be set. If position is less
109 * than or greater than the text range for searching,
110 * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
111 * @param status for errors if it occurs
112 * @stable ICU 2.0
113 */
114 virtual void setOffset(int32_t position, UErrorCode &status) = 0;
115
116 /**
117 * Return the current index in the text being searched.
118 * If the iteration has gone past the end of the text
119 * (or past the beginning for a backwards search), USEARCH_DONE
120 * is returned.
121 * @return current index in the text being searched.
122 * @stable ICU 2.0
123 */
124 virtual int32_t getOffset(void) const = 0;
125
126 /**
127 * Sets the text searching attributes located in the enum
128 * USearchAttribute with values from the enum USearchAttributeValue.
129 * USEARCH_DEFAULT can be used for all attributes for resetting.
130 * @param attribute text attribute (enum USearchAttribute) to be set
131 * @param value text attribute value
132 * @param status for errors if it occurs
133 * @stable ICU 2.0
134 */
135 void setAttribute(USearchAttribute attribute,
136 USearchAttributeValue value,
137 UErrorCode &status);
138
139 /**
140 * Gets the text searching attributes
141 * @param attribute text attribute (enum USearchAttribute) to be retrieve
142 * @return text attribute value
143 * @stable ICU 2.0
144 */
145 USearchAttributeValue getAttribute(USearchAttribute attribute) const;
146
147 /**
148 * Returns the index to the match in the text string that was searched.
149 * This call returns a valid result only after a successful call to
150 * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
151 * Just after construction, or after a searching method returns
152 * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
153 * <p>
154 * Use getMatchedLength to get the matched string length.
155 * @return index of a substring within the text string that is being
156 * searched.
157 * @see #first
158 * @see #next
159 * @see #previous
160 * @see #last
161 * @stable ICU 2.0
162 */
163 int32_t getMatchedStart(void) const;
164
165 /**
166 * Returns the length of text in the string which matches the search
167 * pattern. This call returns a valid result only after a successful call
168 * to <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
169 * Just after construction, or after a searching method returns
170 * <tt>USEARCH_DONE</tt>, this method will return 0.
171 * @return The length of the match in the target text, or 0 if there
172 * is no match currently.
173 * @see #first
174 * @see #next
175 * @see #previous
176 * @see #last
177 * @stable ICU 2.0
178 */
179 int32_t getMatchedLength(void) const;
180
181 /**
182 * Returns the text that was matched by the most recent call to
183 * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
184 * If the iterator is not pointing at a valid match (e.g. just after
185 * construction or after <tt>USEARCH_DONE</tt> has been returned,
186 * returns an empty string.
187 * @param result stores the matched string or an empty string if a match
188 * is not found.
189 * @see #first
190 * @see #next
191 * @see #previous
192 * @see #last
193 * @stable ICU 2.0
194 */
195 void getMatchedText(UnicodeString &result) const;
196
197 /**
198 * Set the BreakIterator that will be used to restrict the points
199 * at which matches are detected. The user is responsible for deleting
200 * the breakiterator.
201 * @param breakiter A BreakIterator that will be used to restrict the
202 * points at which matches are detected. If a match is
203 * found, but the match's start or end index is not a
204 * boundary as determined by the <tt>BreakIterator</tt>,
205 * the match will be rejected and another will be searched
206 * for. If this parameter is <tt>NULL</tt>, no break
207 * detection is attempted.
208 * @param status for errors if it occurs
209 * @see BreakIterator
210 * @stable ICU 2.0
211 */
212 void setBreakIterator(BreakIterator *breakiter, UErrorCode &status);
213
214 /**
215 * Returns the BreakIterator that is used to restrict the points at
216 * which matches are detected. This will be the same object that was
217 * passed to the constructor or to <tt>setBreakIterator</tt>.
218 * Note that <tt>NULL</tt> is a legal value; it means that break
219 * detection should not be attempted.
220 * @return BreakIterator used to restrict matchings.
221 * @see #setBreakIterator
222 * @stable ICU 2.0
223 */
224 const BreakIterator * getBreakIterator(void) const;
225
226 /**
227 * Set the string text to be searched. Text iteration will hence begin at
228 * the start of the text string. This method is useful if you want to
229 * re-use an iterator to search for the same pattern within a different
230 * body of text. The user is responsible for deleting the text.
231 * @param text string to be searched.
232 * @param status for errors. If the text length is 0,
233 * an U_ILLEGAL_ARGUMENT_ERROR is returned.
234 * @stable ICU 2.0
235 */
236 virtual void setText(const UnicodeString &text, UErrorCode &status);
237
238 /**
239 * Set the string text to be searched. Text iteration will hence begin at
240 * the start of the text string. This method is useful if you want to
241 * re-use an iterator to search for the same pattern within a different
242 * body of text.
243 * <p>
244 * Note: No parsing of the text within the <tt>CharacterIterator</tt>
245 * will be done during searching for this version. The block of text
246 * in <tt>CharacterIterator</tt> will be used as it is.
247 * The user is responsible for deleting the text.
248 * @param text string iterator to be searched.
249 * @param status for errors if any. If the text length is 0 then an
250 * U_ILLEGAL_ARGUMENT_ERROR is returned.
251 * @stable ICU 2.0
252 */
253 virtual void setText(CharacterIterator &text, UErrorCode &status);
254
255 /**
256 * Return the string text to be searched.
257 * @return text string to be searched.
258 * @stable ICU 2.0
259 */
260 const UnicodeString & getText(void) const;
261
262 // operator overloading ----------------------------------------------
263
264 /**
265 * Equality operator.
266 * @param that SearchIterator instance to be compared.
267 * @return TRUE if both BreakIterators are of the same class, have the
268 * same behavior, terates over the same text and have the same
269 * attributes. FALSE otherwise.
270 * @stable ICU 2.0
271 */
272 virtual UBool operator==(const SearchIterator &that) const;
273
274 /**
275 * Not-equal operator.
276 * @param that SearchIterator instance to be compared.
277 * @return FALSE if operator== returns TRUE, and vice versa.
278 * @stable ICU 2.0
279 */
280 UBool operator!=(const SearchIterator &that) const;
281
282 // public methods ----------------------------------------------------
283
284 /**
285 * Returns a copy of SearchIterator with the same behavior, and
286 * iterating over the same text, as this one. Note that all data will be
287 * replicated, except for the text string to be searched.
288 * @return cloned object
289 * @stable ICU 2.0
290 */
291 virtual SearchIterator* safeClone(void) const = 0;
292
293 /**
294 * Returns the first index at which the string text matches the search
295 * pattern. The iterator is adjusted so that its current index (as
296 * returned by <tt>getOffset</tt>) is the match position if one
297 * was found.
298 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
299 * the iterator will be adjusted to the index USEARCH_DONE
300 * @param status for errors if it occurs
301 * @return The character index of the first match, or
302 * <tt>USEARCH_DONE</tt> if there are no matches.
303 * @see #getOffset
304 * @stable ICU 2.0
305 */
306 int32_t first(UErrorCode &status);
307
308 /**
309 * Returns the first index greater than <tt>position</tt> at which the
310 * string text matches the search pattern. The iterator is adjusted so
311 * that its current index (as returned by <tt>getOffset</tt>) is the
312 * match position if one was found. If a match is not found,
313 * <tt>USEARCH_DONE</tt> will be returned and the iterator will be
314 * adjusted to the index USEARCH_DONE
315 * @param position where search if to start from. If position is less
316 * than or greater than the text range for searching,
317 * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
318 * @param status for errors if it occurs
319 * @return The character index of the first match following
320 * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are no
321 * matches.
322 * @see #getOffset
323 * @stable ICU 2.0
324 */
325 int32_t following(int32_t position, UErrorCode &status);
326
327 /**
328 * Returns the last index in the target text at which it matches the
329 * search pattern. The iterator is adjusted so that its current index
330 * (as returned by <tt>getOffset</tt>) is the match position if one was
331 * found.
332 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
333 * the iterator will be adjusted to the index USEARCH_DONE.
334 * @param status for errors if it occurs
335 * @return The index of the first match, or <tt>USEARCH_DONE</tt> if
336 * there are no matches.
337 * @see #getOffset
338 * @stable ICU 2.0
339 */
340 int32_t last(UErrorCode &status);
341
342 /**
343 * Returns the first index less than <tt>position</tt> at which the string
344 * text matches the search pattern. The iterator is adjusted so that its
345 * current index (as returned by <tt>getOffset</tt>) is the match
346 * position if one was found. If a match is not found,
347 * <tt>USEARCH_DONE</tt> will be returned and the iterator will be
348 * adjusted to the index USEARCH_DONE
349 * @param position where search is to start from. If position is less
350 * than or greater than the text range for searching,
351 * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
352 * @param status for errors if it occurs
353 * @return The character index of the first match preceding
354 * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are
355 * no matches.
356 * @see #getOffset
357 * @stable ICU 2.0
358 */
359 int32_t preceding(int32_t position, UErrorCode &status);
360
361 /**
362 * Returns the index of the next point at which the text matches the
363 * search pattern, starting from the current position
364 * The iterator is adjusted so that its current index (as returned by
365 * <tt>getOffset</tt>) is the match position if one was found.
366 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
367 * the iterator will be adjusted to a position after the end of the text
368 * string.
369 * @param status for errors if it occurs
370 * @return The index of the next match after the current position,
371 * or <tt>USEARCH_DONE</tt> if there are no more matches.
372 * @see #getOffset
373 * @stable ICU 2.0
374 */
375 int32_t next(UErrorCode &status);
376
377 /**
378 * Returns the index of the previous point at which the string text
379 * matches the search pattern, starting at the current position.
380 * The iterator is adjusted so that its current index (as returned by
381 * <tt>getOffset</tt>) is the match position if one was found.
382 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
383 * the iterator will be adjusted to the index USEARCH_DONE
384 * @param status for errors if it occurs
385 * @return The index of the previous match before the current position,
386 * or <tt>USEARCH_DONE</tt> if there are no more matches.
387 * @see #getOffset
388 * @stable ICU 2.0
389 */
390 int32_t previous(UErrorCode &status);
391
392 /**
393 * Resets the iteration.
394 * Search will begin at the start of the text string if a forward
395 * iteration is initiated before a backwards iteration. Otherwise if a
396 * backwards iteration is initiated before a forwards iteration, the
397 * search will begin at the end of the text string.
398 * @stable ICU 2.0
399 */
400 virtual void reset();
401
402 protected:
403 // protected data members ---------------------------------------------
404
405 /**
406 * C search data struct
407 * @stable ICU 2.0
408 */
409 USearch *m_search_;
410
411 /**
412 * Break iterator.
413 * Currently the C++ breakiterator does not have getRules etc to reproduce
414 * another in C. Hence we keep the original around and do the verification
415 * at the end of the match. The user is responsible for deleting this
416 * break iterator.
417 * @stable ICU 2.0
418 */
419 BreakIterator *m_breakiterator_;
420
421 /**
422 * Unicode string version of the search text
423 * @stable ICU 2.0
424 */
425 UnicodeString m_text_;
426
427 // protected constructors and destructors -----------------------------
428
429 /**
430 * Default constructor.
431 * Initializes data to the default values.
432 * @stable ICU 2.0
433 */
434 SearchIterator();
435
436 /**
437 * Constructor for use by subclasses.
438 * @param text The target text to be searched.
439 * @param breakiter A {@link BreakIterator} that is used to restrict the
440 * points at which matches are detected. If
441 * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a
442 * match, but the match's start or end index is not a
443 * boundary as determined by the <tt>BreakIterator</tt>,
444 * the match is rejected and <tt>handleNext</tt> or
445 * <tt>handlePrev</tt> is called again. If this parameter
446 * is <tt>NULL</tt>, no break detection is attempted.
447 * @see #handleNext
448 * @see #handlePrev
449 * @stable ICU 2.0
450 */
451 SearchIterator(const UnicodeString &text,
452 BreakIterator *breakiter = NULL);
453
454 /**
455 * Constructor for use by subclasses.
456 * <p>
457 * Note: No parsing of the text within the <tt>CharacterIterator</tt>
458 * will be done during searching for this version. The block of text
459 * in <tt>CharacterIterator</tt> will be used as it is.
460 * @param text The target text to be searched.
461 * @param breakiter A {@link BreakIterator} that is used to restrict the
462 * points at which matches are detected. If
463 * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a
464 * match, but the match's start or end index is not a
465 * boundary as determined by the <tt>BreakIterator</tt>,
466 * the match is rejected and <tt>handleNext</tt> or
467 * <tt>handlePrev</tt> is called again. If this parameter
468 * is <tt>NULL</tt>, no break detection is attempted.
469 * @see #handleNext
470 * @see #handlePrev
471 * @stable ICU 2.0
472 */
473 SearchIterator(CharacterIterator &text, BreakIterator *breakiter = NULL);
474
475 // protected methods --------------------------------------------------
476
477 /**
478 * Assignment operator. Sets this iterator to have the same behavior,
479 * and iterate over the same text, as the one passed in.
480 * @param that instance to be copied.
481 * @stable ICU 2.0
482 */
483 SearchIterator & operator=(const SearchIterator &that);
484
485 /**
486 * Abstract method which subclasses override to provide the mechanism
487 * for finding the next match in the target text. This allows different
488 * subclasses to provide different search algorithms.
489 * <p>
490 * If a match is found, the implementation should return the index at
491 * which the match starts and should call
492 * <tt>setMatchLength</tt> with the number of characters
493 * in the target text that make up the match. If no match is found, the
494 * method should return USEARCH_DONE.
495 * <p>
496 * @param position The index in the target text at which the search
497 * should start.
498 * @param status for error codes if it occurs.
499 * @return index at which the match starts, else if match is not found
500 * USEARCH_DONE is returned
501 * @see #setMatchLength
502 * @stable ICU 2.0
503 */
504 virtual int32_t handleNext(int32_t position, UErrorCode &status)
505 = 0;
506
507 /**
508 * Abstract method which subclasses override to provide the mechanism for
509 * finding the previous match in the target text. This allows different
510 * subclasses to provide different search algorithms.
511 * <p>
512 * If a match is found, the implementation should return the index at
513 * which the match starts and should call
514 * <tt>setMatchLength</tt> with the number of characters
515 * in the target text that make up the match. If no match is found, the
516 * method should return USEARCH_DONE.
517 * <p>
518 * @param position The index in the target text at which the search
519 * should start.
520 * @param status for error codes if it occurs.
521 * @return index at which the match starts, else if match is not found
522 * USEARCH_DONE is returned
523 * @see #setMatchLength
524 * @stable ICU 2.0
525 */
526 virtual int32_t handlePrev(int32_t position, UErrorCode &status)
527 = 0;
528
529 /**
530 * Sets the length of the currently matched string in the text string to
531 * be searched.
532 * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
533 * methods should call this when they find a match in the target text.
534 * @param length length of the matched text.
535 * @see #handleNext
536 * @see #handlePrev
537 * @stable ICU 2.0
538 */
539 virtual void setMatchLength(int32_t length);
540
541 /**
542 * Sets the offset of the currently matched string in the text string to
543 * be searched.
544 * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
545 * methods should call this when they find a match in the target text.
546 * @param position start offset of the matched text.
547 * @see #handleNext
548 * @see #handlePrev
549 * @stable ICU 2.0
550 */
551 virtual void setMatchStart(int32_t position);
552
553 /**
554 * sets match not found
555 * @stable ICU 2.0
556 */
557 void setMatchNotFound();
558 };
559
560 inline UBool SearchIterator::operator!=(const SearchIterator &that) const
561 {
562 return !operator==(that);
563 }
564 U_NAMESPACE_END
565
566 #endif /* #if !UCONFIG_NO_COLLATION */
567
568 #endif
569
OLDNEW
« no previous file with comments | « public/i18n/unicode/regex.h ('k') | public/i18n/unicode/selfmt.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698